library(tidyverse)
Warning: package ‘tidyverse’ was built under R version 4.2.2Registered S3 methods overwritten by 'dbplyr':
  method         from
  print.tbl_lazy     
  print.tbl_sql      
── Attaching packages ──────────────────────────────────────────────────────────────────────────── tidyverse 1.3.2 ──✔ ggplot2 3.4.0      ✔ purrr   0.3.4 
✔ tibble  3.1.8      ✔ dplyr   1.0.10
✔ tidyr   1.2.1      ✔ stringr 1.4.1 
✔ readr   2.1.3      ✔ forcats 0.5.2 Warning: package ‘ggplot2’ was built under R version 4.2.2── Conflicts ─────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
library(lubridate)

Attaching package: ‘lubridate’

The following objects are masked from ‘package:base’:

    date, intersect, setdiff, union
library(janitor)

Attaching package: ‘janitor’

The following objects are masked from ‘package:stats’:

    chisq.test, fisher.test
library(caret)
Warning: package ‘caret’ was built under R version 4.2.2Loading required package: lattice
Registered S3 method overwritten by 'data.table':
  method           from
  print.data.table     

Attaching package: ‘caret’

The following object is masked from ‘package:purrr’:

    lift
library(broom)
Warning: package ‘broom’ was built under R version 4.2.2
library(fastDummies)
Warning: package ‘fastDummies’ was built under R version 4.2.2
library(GGally)
Warning: package ‘GGally’ was built under R version 4.2.2Registered S3 method overwritten by 'GGally':
  method from   
  +.gg   ggplot2
library(ggfortify)
Warning: package ‘ggfortify’ was built under R version 4.2.2
library(mosaic)
Warning: package ‘mosaic’ was built under R version 4.2.2Registered S3 method overwritten by 'mosaic':
  method                           from   
  fortify.SpatialPolygonsDataFrame ggplot2

The 'mosaic' package masks several functions from core packages in order to add 
additional features.  The original behavior of these functions should not be affected by this.

Attaching package: ‘mosaic’

The following object is masked from ‘package:Matrix’:

    mean

The following object is masked from ‘package:caret’:

    dotPlot

The following objects are masked from ‘package:dplyr’:

    count, do, tally

The following object is masked from ‘package:purrr’:

    cross

The following object is masked from ‘package:ggplot2’:

    stat

The following objects are masked from ‘package:stats’:

    binom.test, cor, cor.test, cov, fivenum, IQR, median, prop.test, quantile, sd, t.test, var

The following objects are masked from ‘package:base’:

    max, mean, min, prod, range, sample, sum
library(mosaicData)
library(modelr)
Warning: package ‘modelr’ was built under R version 4.2.2
Attaching package: ‘modelr’

The following object is masked from ‘package:mosaic’:

    resample

The following object is masked from ‘package:ggformula’:

    na.warn

The following object is masked from ‘package:broom’:

    bootstrap
library(relaimpo)
Warning: package ‘relaimpo’ was built under R version 4.2.2Loading required package: MASS

Attaching package: ‘MASS’

The following object is masked from ‘package:dplyr’:

    select

Loading required package: boot

Attaching package: ‘boot’

The following object is masked from ‘package:mosaic’:

    logit

The following object is masked from ‘package:lattice’:

    melanoma

Loading required package: survey
Warning: package ‘survey’ was built under R version 4.2.2Loading required package: grid
Loading required package: survival

Attaching package: ‘survival’

The following object is masked from ‘package:boot’:

    aml

The following object is masked from ‘package:caret’:

    cluster


Attaching package: ‘survey’

The following object is masked from ‘package:graphics’:

    dotchart

Loading required package: mitools
Warning: package ‘mitools’ was built under R version 4.2.2This is the global version of package relaimpo.

If you are a non-US user, a version with the interesting additional metric pmvd is available

from Ulrike Groempings web site at prof.beuth-hochschule.de/groemping.
library(tidyverse)
library(glmulti)
Warning: package ‘glmulti’ was built under R version 4.2.2Loading required package: rJava
Loading required package: leaps
Warning: package ‘leaps’ was built under R version 4.2.2
avocados <- read_csv(here::here("weekend/data/avocado.csv")) %>% clean_names()
New names:Rows: 18249 Columns: 14── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr   (2): type, region
dbl  (11): ...1, AveragePrice, Total Volume, 4046, 4225, 4770, Total Bags, Small Bags, Large Bags, XLarge Bags, year
date  (1): Date
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
avocados %>%
  distinct(region)

avocados %>% 
  filter(region == "TotalUS",
         date == "2015-12-27")

avocados %>% 
  filter(region %in% c("Midsouth", "Northeast", "Plains", "SouthCentral", "Southeast", "West", "GreatLakes", "California"),
         date == "2015-12-27") %>% 
  group_by(type) %>% 
  summarise(total_volume = sum(total_volume),
            x4046 = sum(x4046),
            total_bags = sum(total_bags))

avocados %>% 
  filter(!region %in% c("TotalUS", "Midsouth", "Northeast", "Plains", "SouthCentral", "Southeast", "West", "GreatLakes", "California"),
         date == "2015-12-27") %>% 
  group_by(type) %>% 
  summarise(total_volume = sum(total_volume),
            x4046 = sum(x4046),
            total_bags = sum(total_bags))

# Missouth, Northeast, Plains, SouthCentral, Southeast, West, GreatLakes, California
# These are the regions that make up the total US. Above code shows that the total numbers of the categories are the same

# I will filter the data so that it only uses the lines for the above regions to avoid double counting.
# no need to worry about the smaller areas as these are contained within the bigger regions
# also i dont think it makes sense to look at regions that small
# also by the looks of it the remaining cities arent an exhaustive list 

# might be good to extract month from date as a categorical
# proportions of 4046, 4225, 4770 may be better than absolutes, also props of bag size may be better than absolute numbers 
avocados %>% 
  filter(region %in% c("Midsouth", 
                       "Northeast", 
                       "Plains", 
                       "SouthCentral", 
                       "Southeast", 
                       "West", 
                       "GreatLakes", 
                       "California")) %>% 
  mutate(a = round(small_bags + large_bags + x_large_bags, 2) - round(total_bags, 2)) %>% 
  arrange(desc(a))

avocados



alias(lm(average_price ~ ., data = avocados))
Model :
average_price ~ x1 + date + total_volume + x4046 + x4225 + x4770 + 
    total_bags + small_bags + large_bags + x_large_bags + type + 
    year + region
# for some reason the bag sizes dont properly add up to the total_bags
# without the round they are different by like 0.000000000001
# with the round there are a couple at either end out by +-1 
# this means they dont show up in alias - I'm going to manual remove x larges bags as this can be derived from the other 3
# x4046, x4225 and x 4770 dont add up to the total volume so can keep them all
avocados <- avocados %>% filter(region %in% c("Midsouth", 
                       "Northeast", 
                       "Plains", 
                       "SouthCentral", 
                       "Southeast", 
                       "West", 
                       "GreatLakes", 
                       "California")) %>% 
  mutate(month = as.factor(month(date)),
         across(where(is.character), as.factor),
         year = as.factor(year)) %>% 
  dplyr::select(-x1, -date, -x_large_bags) 
  
n_data <- nrow(avocados)

test_index <- sample(1:n_data, size = n_data * 0.2)

test <- avocados %>% 
  slice(test_index)

train <- avocados %>% 
  slice(-test_index)
lm1 <- lm(average_price ~ total_volume, train)

summary(lm1)

Call:
lm(formula = average_price ~ total_volume, data = train)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.67197 -0.17047 -0.01404  0.14308  0.99220 

Coefficients:
               Estimate Std. Error t value Pr(>|t|)    
(Intercept)   1.596e+00  7.024e-03  227.20   <2e-16 ***
total_volume -1.055e-07  2.167e-09  -48.68   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.2424 on 2162 degrees of freedom
Multiple R-squared:  0.5229,    Adjusted R-squared:  0.5227 
F-statistic:  2369 on 1 and 2162 DF,  p-value: < 2.2e-16
plotModel(lm1)

plot(lm1)


# diagnostocs are mad i think this is because of the organic type
avocados_resid <- train %>% 
  add_residuals(lm1) %>% 
  dplyr::select(-average_price, -total_volume) %>% 
  dplyr::select(resid, everything()) 


avocados_resid %>% 
  dplyr::select(resid, is.factor) %>% 
  ggpairs() 
Warning: Use of bare predicate functions was deprecated in tidyselect 1.1.0.
Please use wrap predicates in `where()` instead.
# Was:
data %>% select(is.factor)

# Now:
data %>% select(where(is.factor))

# month, type and region all quite strong

avocados_resid %>% 
 dplyr::select(is.numeric) %>% 
  ggpairs() 
Warning: Use of bare predicate functions was deprecated in tidyselect 1.1.0.
Please use wrap predicates in `where()` instead.
# Was:
data %>% select(is.numeric)

# Now:
data %>% select(where(is.numeric))

train %>% 
  ggplot(aes(x = x4046, y = average_price, colour = type)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE)

# filter by type to check if the are correlated with different things

avocados %>% 
  filter(type == "conventional") %>% 
  dplyr::select(average_price, is.factor) %>% 
  ggpairs() 


# month, type and region all quite strong

avocados %>% 
  filter(type == "conventional") %>%
  dplyr::select(is.numeric) %>% 
  ggpairs() 






avocados %>% 
  filter(type == "organic") %>% 
  dplyr::select(average_price, is.factor) %>% 
  ggpairs() 


# month, type and region all quite strong

avocados %>% 
  filter(type == "organic") %>%
  dplyr::select(is.numeric) %>% 
  ggpairs() 


# total volume and total bags strongest
# going to do type first

lm2 <- lm(average_price ~ type, train)

summary(lm2)

Call:
lm(formula = average_price ~ type, data = train)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.70986 -0.16986 -0.01986  0.15490  0.97014 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 1.125101   0.007699  146.13   <2e-16 ***
typeorganic 0.484760   0.010909   44.44   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.2537 on 2162 degrees of freedom
Multiple R-squared:  0.4774,    Adjusted R-squared:  0.4771 
F-statistic:  1975 on 1 and 2162 DF,  p-value: < 2.2e-16
plot(lm2)

NA
NA
avocados_resid <- train %>% 
  add_residuals(lm2) %>% 
  dplyr::select(-average_price, -type) %>% 
  dplyr::select(resid, everything()) 


avocados_resid %>% 
  dplyr::select(resid, is.factor) %>% 
  ggpairs() 


# month, type and region all quite strong

avocados_resid %>% 
  dplyr::select(is.numeric) %>% 
  ggpairs() 

lm3 <- lm(average_price ~ type + region, train)

summary(lm3)

Call:
lm(formula = average_price ~ type + region, data = train)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.68255 -0.13691 -0.02448  0.10986  1.00801 

Coefficients:
                     Estimate Std. Error t value Pr(>|t|)    
(Intercept)         1.1669791  0.0141303  82.587  < 2e-16 ***
typeorganic         0.4830186  0.0092509  52.213  < 2e-16 ***
regionGreatLakes   -0.0674479  0.0186557  -3.615 0.000307 ***
regionMidsouth      0.0001922  0.0186725   0.010 0.991786    
regionNortheast     0.1899355  0.0188109  10.097  < 2e-16 ***
regionPlains        0.0287362  0.0185436   1.550 0.121370    
regionSouthCentral -0.3124954  0.0186231 -16.780  < 2e-16 ***
regionSoutheast    -0.0152272  0.0187933  -0.810 0.417888    
regionWest         -0.1380063  0.0185090  -7.456 1.28e-13 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.2151 on 2155 degrees of freedom
Multiple R-squared:  0.6256,    Adjusted R-squared:  0.6242 
F-statistic: 450.2 on 8 and 2155 DF,  p-value: < 2.2e-16
plot(lm3)


anova(lm2, lm3)
Analysis of Variance Table

Model 1: average_price ~ type
Model 2: average_price ~ type + region
  Res.Df    RSS Df Sum of Sq      F    Pr(>F)    
1   2162 139.19                                  
2   2155  99.70  7    39.485 121.92 < 2.2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
avocados_resid <- train %>% 
  add_residuals(lm3) %>% 
  dplyr::select(-average_price, -type, -region) %>% 
  dplyr::select(resid, everything()) 


avocados_resid %>% 
  dplyr::select(resid, is.factor) %>% 
  ggpairs() 


# month, type and region all quite strong

avocados_resid %>% 
  dplyr::select(is.numeric) %>% 
  ggpairs() 

lm4 <- lm(average_price ~ type + region + month, train)

summary(lm4)

Call:
lm(formula = average_price ~ type + region + month, data = train)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.54688 -0.12102 -0.00375  0.10678  0.86726 

Coefficients:
                    Estimate Std. Error t value Pr(>|t|)    
(Intercept)         1.076014   0.017246  62.392  < 2e-16 ***
typeorganic         0.483058   0.008102  59.623  < 2e-16 ***
regionGreatLakes   -0.066288   0.016345  -4.056 5.18e-05 ***
regionMidsouth     -0.003844   0.016356  -0.235 0.814221    
regionNortheast     0.191719   0.016475  11.637  < 2e-16 ***
regionPlains        0.030713   0.016244   1.891 0.058799 .  
regionSouthCentral -0.310548   0.016311 -19.040  < 2e-16 ***
regionSoutheast    -0.013073   0.016469  -0.794 0.427402    
regionWest         -0.134413   0.016212  -8.291  < 2e-16 ***
month2             -0.050981   0.018253  -2.793 0.005269 ** 
month3              0.011650   0.018077   0.644 0.519339    
month4              0.035496   0.018890   1.879 0.060375 .  
month5              0.021035   0.018949   1.110 0.267086    
month6              0.075376   0.019690   3.828 0.000133 ***
month7              0.147845   0.018858   7.840 7.04e-15 ***
month8              0.218080   0.019229  11.341  < 2e-16 ***
month9              0.263865   0.019757  13.356  < 2e-16 ***
month10             0.266870   0.018916  14.108  < 2e-16 ***
month11             0.154964   0.019163   8.087 1.01e-15 ***
month12             0.018761   0.019291   0.972 0.330913    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.1883 on 2144 degrees of freedom
Multiple R-squared:  0.7144,    Adjusted R-squared:  0.7119 
F-statistic: 282.3 on 19 and 2144 DF,  p-value: < 2.2e-16
plot(lm4)


anova(lm3, lm4)
Analysis of Variance Table

Model 1: average_price ~ type + region
Model 2: average_price ~ type + region + month
  Res.Df    RSS Df Sum of Sq      F    Pr(>F)    
1   2155 99.700                                  
2   2144 76.047 11    23.653 60.622 < 2.2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# log helps diagnostics

lm4 <- lm(log(average_price) ~ type + region + month, train)

summary(lm4)

Call:
lm(formula = log(average_price) ~ type + region + month, data = train)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.48864 -0.09065 -0.00079  0.08789  0.45193 

Coefficients:
                    Estimate Std. Error t value Pr(>|t|)    
(Intercept)         0.054821   0.012618   4.345 1.46e-05 ***
typeorganic         0.363732   0.005928  61.359  < 2e-16 ***
regionGreatLakes   -0.027175   0.011959  -2.272 0.023161 *  
regionMidsouth      0.018802   0.011967   1.571 0.116300    
regionNortheast     0.146238   0.012054  12.132  < 2e-16 ***
regionPlains        0.034755   0.011885   2.924 0.003490 ** 
regionSouthCentral -0.245445   0.011934 -20.567  < 2e-16 ***
regionSoutheast     0.002760   0.012050   0.229 0.818870    
regionWest         -0.106270   0.011862  -8.959  < 2e-16 ***
month2             -0.046976   0.013356  -3.517 0.000445 ***
month3              0.015634   0.013226   1.182 0.237333    
month4              0.027947   0.013822   2.022 0.043303 *  
month5              0.012461   0.013864   0.899 0.368874    
month6              0.061402   0.014407   4.262 2.11e-05 ***
month7              0.114630   0.013798   8.308  < 2e-16 ***
month8              0.156065   0.014069  11.093  < 2e-16 ***
month9              0.182715   0.014456  12.640  < 2e-16 ***
month10             0.193309   0.013840  13.967  < 2e-16 ***
month11             0.117171   0.014021   8.357  < 2e-16 ***
month12             0.016215   0.014115   1.149 0.250778    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.1378 on 2144 degrees of freedom
Multiple R-squared:  0.729, Adjusted R-squared:  0.7266 
F-statistic: 303.6 on 19 and 2144 DF,  p-value: < 2.2e-16
plot(lm4)

NA

avocados_resid <- train %>% 
  add_residuals(lm4) %>% 
  dplyr::select(-average_price, -type, -region, -month) %>% 
  dplyr::select(resid, everything()) 


avocados_resid %>% 
  dplyr::select(resid, is.factor) %>% 
  ggpairs() 


# month, type and region all quite strong

avocados_resid %>% 
  dplyr::select(is.numeric) %>% 
  ggpairs() 


lm5 <- lm(log(average_price) ~ type + region + month + year, train)

summary(lm5)

Call:
lm(formula = log(average_price) ~ type + region + month + year, 
    data = train)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.50737 -0.07874  0.00299  0.08753  0.39464 

Coefficients:
                    Estimate Std. Error t value Pr(>|t|)    
(Intercept)         0.014160   0.012693   1.116 0.264752    
typeorganic         0.364207   0.005467  66.617  < 2e-16 ***
regionGreatLakes   -0.026025   0.011030  -2.360 0.018386 *  
regionMidsouth      0.021443   0.011038   1.943 0.052180 .  
regionNortheast     0.145697   0.011117  13.106  < 2e-16 ***
regionPlains        0.033353   0.010961   3.043 0.002373 ** 
regionSouthCentral -0.245515   0.011006 -22.308  < 2e-16 ***
regionSoutheast     0.003288   0.011113   0.296 0.767358    
regionWest         -0.106062   0.010939  -9.695  < 2e-16 ***
month2             -0.045500   0.012323  -3.692 0.000228 ***
month3              0.021422   0.012213   1.754 0.079571 .  
month4              0.039185   0.013006   3.013 0.002619 ** 
month5              0.038088   0.013063   2.916 0.003585 ** 
month6              0.079723   0.013544   5.886 4.58e-09 ***
month7              0.134623   0.012989  10.364  < 2e-16 ***
month8              0.173161   0.013243  13.075  < 2e-16 ***
month9              0.201671   0.013583  14.847  < 2e-16 ***
month10             0.208536   0.013023  16.013  < 2e-16 ***
month11             0.138760   0.013205  10.508  < 2e-16 ***
month12             0.027549   0.013273   2.076 0.038051 *  
year2016           -0.028764   0.006968  -4.128 3.80e-05 ***
year2017            0.095170   0.006953  13.687  < 2e-16 ***
year2018            0.088329   0.012722   6.943 5.08e-12 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.1271 on 2141 degrees of freedom
Multiple R-squared:  0.7698,    Adjusted R-squared:  0.7675 
F-statistic: 325.5 on 22 and 2141 DF,  p-value: < 2.2e-16
plot(lm5)


anova(lm4, lm5)
Analysis of Variance Table

Model 1: log(average_price) ~ type + region + month
Model 2: log(average_price) ~ type + region + month + year
  Res.Df    RSS Df Sum of Sq      F    Pr(>F)    
1   2144 40.711                                  
2   2141 34.576  3    6.1356 126.64 < 2.2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
avocados_resid <- train %>% 
  add_residuals(lm5) %>% 
  dplyr::select(-average_price, -type, -region, -month, -year) %>% 
  dplyr::select(resid, everything()) 


avocados_resid %>% 
  dplyr::select(resid, is.factor) %>% 
  ggpairs() 


# month, type and region all quite strong

avocados_resid %>% 
  dplyr::select(is.numeric) %>% 
  ggpairs() 

lm6 <- lm(log(average_price) ~ type + region + month + year + x4046, train)

summary(lm6)

Call:
lm(formula = log(average_price) ~ type + region + month + year + 
    x4046, data = train)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.48464 -0.07297  0.00623  0.08268  0.37507 

Coefficients:
                     Estimate Std. Error t value Pr(>|t|)    
(Intercept)         1.529e-01  1.505e-02  10.163  < 2e-16 ***
typeorganic         2.596e-01  8.549e-03  30.370  < 2e-16 ***
regionGreatLakes   -8.908e-02  1.124e-02  -7.925 3.64e-15 ***
regionMidsouth     -3.716e-02  1.115e-02  -3.334 0.000870 ***
regionNortheast     7.083e-02  1.162e-02   6.097 1.28e-09 ***
regionPlains       -2.063e-02  1.098e-02  -1.879 0.060387 .  
regionSouthCentral -2.137e-01  1.065e-02 -20.075  < 2e-16 ***
regionSoutheast    -9.302e-03  1.058e-02  -0.879 0.379354    
regionWest         -1.056e-01  1.038e-02 -10.174  < 2e-16 ***
month2             -3.590e-02  1.171e-02  -3.065 0.002205 ** 
month3              1.929e-02  1.159e-02   1.664 0.096272 .  
month4              4.324e-02  1.235e-02   3.502 0.000471 ***
month5              4.414e-02  1.240e-02   3.558 0.000381 ***
month6              8.339e-02  1.286e-02   6.486 1.09e-10 ***
month7              1.356e-01  1.233e-02  11.000  < 2e-16 ***
month8              1.714e-01  1.257e-02  13.635  < 2e-16 ***
month9              1.948e-01  1.290e-02  15.104  < 2e-16 ***
month10             1.954e-01  1.239e-02  15.775  < 2e-16 ***
month11             1.247e-01  1.257e-02   9.922  < 2e-16 ***
month12             1.855e-02  1.261e-02   1.471 0.141462    
year2016           -3.463e-02  6.624e-03  -5.227 1.89e-07 ***
year2017            9.202e-02  6.602e-03  13.937  < 2e-16 ***
year2018            9.390e-02  1.208e-02   7.773 1.18e-14 ***
x4046              -7.047e-08  4.578e-09 -15.392  < 2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.1206 on 2140 degrees of freedom
Multiple R-squared:  0.7928,    Adjusted R-squared:  0.7906 
F-statistic:   356 on 23 and 2140 DF,  p-value: < 2.2e-16
plot(lm6)

NA

avocados_resid <- train %>% 
  add_residuals(lm6) %>% 
  dplyr::select(-average_price, -type, -region, -month, -year, -x4046) %>% 
  dplyr::select(resid, everything()) 


avocados_resid %>% 
  dplyr::select(is.numeric) %>% 
  ggpairs() 




lm7 <- lm(log(average_price) ~ type + region + month + year + x4046 + total_volume, train)

summary(lm7)

Call:
lm(formula = log(average_price) ~ type + region + month + year + 
    x4046 + total_volume, data = train)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.49928 -0.06748  0.00721  0.07714  0.36006 

Coefficients:
                     Estimate Std. Error t value Pr(>|t|)    
(Intercept)         2.840e-01  1.781e-02  15.942  < 2e-16 ***
typeorganic         1.443e-01  1.227e-02  11.758  < 2e-16 ***
regionGreatLakes   -9.998e-02  1.088e-02  -9.192  < 2e-16 ***
regionMidsouth     -6.285e-02  1.094e-02  -5.745 1.05e-08 ***
regionNortheast     8.859e-02  1.129e-02   7.844 6.81e-15 ***
regionPlains       -8.379e-02  1.170e-02  -7.161 1.10e-12 ***
regionSouthCentral -2.405e-01  1.048e-02 -22.941  < 2e-16 ***
regionSoutheast    -5.848e-02  1.092e-02  -5.358 9.33e-08 ***
regionWest         -9.657e-02  1.004e-02  -9.618  < 2e-16 ***
month2             -2.817e-02  1.131e-02  -2.490 0.012854 *  
month3              1.611e-02  1.118e-02   1.440 0.150025    
month4              4.164e-02  1.191e-02   3.496 0.000481 ***
month5              5.024e-02  1.197e-02   4.196 2.83e-05 ***
month6              8.560e-02  1.240e-02   6.902 6.73e-12 ***
month7              1.361e-01  1.189e-02  11.447  < 2e-16 ***
month8              1.658e-01  1.213e-02  13.667  < 2e-16 ***
month9              1.871e-01  1.246e-02  15.021  < 2e-16 ***
month10             1.842e-01  1.198e-02  15.369  < 2e-16 ***
month11             1.120e-01  1.216e-02   9.207  < 2e-16 ***
month12             1.354e-02  1.217e-02   1.113 0.266020    
year2016           -2.083e-02  6.482e-03  -3.214 0.001328 ** 
year2017            1.050e-01  6.450e-03  16.281  < 2e-16 ***
year2018            1.180e-01  1.181e-02   9.995  < 2e-16 ***
x4046              -1.179e-08  6.394e-09  -1.843 0.065435 .  
total_volume       -4.936e-08  3.890e-09 -12.690  < 2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.1163 on 2139 degrees of freedom
Multiple R-squared:  0.8073,    Adjusted R-squared:  0.8051 
F-statistic: 373.4 on 24 and 2139 DF,  p-value: < 2.2e-16
plot(lm7)

avocados_resid <- train %>% 
  add_residuals(lm7) %>% 
  dplyr::select(-average_price, -type, -region, -month, -year, -x4046, -total_volume) %>% 
  dplyr::select(resid, everything()) 


avocados_resid %>% 
  dplyr::select(is.numeric) %>% 
  ggpairs() 

lm8 <- lm(log(average_price) ~ type + region + month + year + x4046 + total_volume + large_bags, train)
lm8b <- lm(log(average_price)~1+type+year+region+month+total_volume+small_bags+large_bags, train)

summary(lm8)

Call:
lm(formula = log(average_price) ~ type + region + month + year + 
    x4046 + total_volume + large_bags, data = train)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.50015 -0.06814  0.00708  0.07650  0.35764 

Coefficients:
                     Estimate Std. Error t value Pr(>|t|)    
(Intercept)         2.908e-01  1.868e-02  15.568  < 2e-16 ***
typeorganic         1.420e-01  1.241e-02  11.443  < 2e-16 ***
regionGreatLakes   -1.039e-01  1.135e-02  -9.158  < 2e-16 ***
regionMidsouth     -6.573e-02  1.119e-02  -5.874 4.93e-09 ***
regionNortheast     8.701e-02  1.137e-02   7.655 2.90e-14 ***
regionPlains       -8.715e-02  1.202e-02  -7.251 5.76e-13 ***
regionSouthCentral -2.427e-01  1.064e-02 -22.819  < 2e-16 ***
regionSoutheast    -6.408e-02  1.184e-02  -5.412 6.94e-08 ***
regionWest         -1.037e-01  1.163e-02  -8.920  < 2e-16 ***
month2             -2.810e-02  1.131e-02  -2.484 0.013062 *  
month3              1.604e-02  1.118e-02   1.434 0.151747    
month4              4.139e-02  1.191e-02   3.475 0.000522 ***
month5              5.002e-02  1.197e-02   4.178 3.06e-05 ***
month6              8.568e-02  1.240e-02   6.909 6.42e-12 ***
month7              1.364e-01  1.189e-02  11.467  < 2e-16 ***
month8              1.660e-01  1.213e-02  13.683  < 2e-16 ***
month9              1.869e-01  1.246e-02  15.003  < 2e-16 ***
month10             1.834e-01  1.200e-02  15.284  < 2e-16 ***
month11             1.110e-01  1.219e-02   9.106  < 2e-16 ***
month12             1.282e-02  1.218e-02   1.052 0.292784    
year2016           -2.210e-02  6.564e-03  -3.367 0.000773 ***
year2017            1.032e-01  6.612e-03  15.613  < 2e-16 ***
year2018            1.152e-01  1.202e-02   9.585  < 2e-16 ***
x4046              -1.102e-08  6.424e-09  -1.715 0.086432 .  
total_volume       -5.133e-08  4.212e-09 -12.187  < 2e-16 ***
large_bags          1.897e-08  1.556e-08   1.219 0.222961    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.1163 on 2138 degrees of freedom
Multiple R-squared:  0.8074,    Adjusted R-squared:  0.8052 
F-statistic: 358.6 on 25 and 2138 DF,  p-value: < 2.2e-16
plot(lm8)

summary(lm8b)

Call:
lm(formula = log(average_price) ~ 1 + type + year + region + 
    month + total_volume + small_bags + large_bags, data = train)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.50016 -0.06826  0.00749  0.07718  0.36209 

Coefficients:
                     Estimate Std. Error t value Pr(>|t|)    
(Intercept)         2.993e-01  1.855e-02  16.131  < 2e-16 ***
typeorganic         1.383e-01  1.217e-02  11.362  < 2e-16 ***
year2016           -2.909e-02  6.805e-03  -4.274 2.00e-05 ***
year2017            9.381e-02  7.048e-03  13.311  < 2e-16 ***
year2018            1.027e-01  1.243e-02   8.262 2.47e-16 ***
regionGreatLakes   -9.767e-02  1.120e-02  -8.722  < 2e-16 ***
regionMidsouth     -6.515e-02  1.113e-02  -5.856 5.47e-09 ***
regionNortheast     9.347e-02  1.057e-02   8.846  < 2e-16 ***
regionPlains       -8.692e-02  1.190e-02  -7.302 3.99e-13 ***
regionSouthCentral -2.409e-01  1.030e-02 -23.381  < 2e-16 ***
regionSoutheast    -6.196e-02  1.161e-02  -5.337 1.04e-07 ***
regionWest         -9.988e-02  1.163e-02  -8.588  < 2e-16 ***
month2             -2.775e-02  1.128e-02  -2.461   0.0140 *  
month3              1.313e-02  1.117e-02   1.176   0.2398    
month4              3.853e-02  1.188e-02   3.242   0.0012 ** 
month5              4.795e-02  1.195e-02   4.014 6.19e-05 ***
month6              8.185e-02  1.240e-02   6.603 5.07e-11 ***
month7              1.334e-01  1.188e-02  11.229  < 2e-16 ***
month8              1.635e-01  1.210e-02  13.509  < 2e-16 ***
month9              1.853e-01  1.242e-02  14.923  < 2e-16 ***
month10             1.816e-01  1.197e-02  15.176  < 2e-16 ***
month11             1.083e-01  1.216e-02   8.901  < 2e-16 ***
month12             9.409e-03  1.217e-02   0.773   0.4396    
total_volume       -6.673e-08  3.906e-09 -17.084  < 2e-16 ***
small_bags          5.189e-08  1.263e-08   4.107 4.16e-05 ***
large_bags          1.890e-08  1.545e-08   1.224   0.2213    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.1159 on 2138 degrees of freedom
Multiple R-squared:  0.8087,    Adjusted R-squared:  0.8064 
F-statistic: 361.5 on 25 and 2138 DF,  p-value: < 2.2e-16
plot(lm8b)

avocados_resid <- train %>% 
  add_residuals(lm8) %>% 
  dplyr::select(-average_price, -type, -region, -month, -year, -x4046, -total_volume, -large_bags) %>% 
  dplyr::select(resid, everything()) 


avocados_resid %>% 
  dplyr::select(is.numeric) %>% 
  ggpairs() 

lm9 <- lm(log(average_price) ~ type + region + month + year + x4046 + total_volume + large_bags, train)

summary(lm9)

Call:
lm(formula = log(average_price) ~ type + region + month + year + 
    x4046 + total_volume + large_bags, data = train)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.50015 -0.06814  0.00708  0.07650  0.35764 

Coefficients:
                     Estimate Std. Error t value Pr(>|t|)    
(Intercept)         2.908e-01  1.868e-02  15.568  < 2e-16 ***
typeorganic         1.420e-01  1.241e-02  11.443  < 2e-16 ***
regionGreatLakes   -1.039e-01  1.135e-02  -9.158  < 2e-16 ***
regionMidsouth     -6.573e-02  1.119e-02  -5.874 4.93e-09 ***
regionNortheast     8.701e-02  1.137e-02   7.655 2.90e-14 ***
regionPlains       -8.715e-02  1.202e-02  -7.251 5.76e-13 ***
regionSouthCentral -2.427e-01  1.064e-02 -22.819  < 2e-16 ***
regionSoutheast    -6.408e-02  1.184e-02  -5.412 6.94e-08 ***
regionWest         -1.037e-01  1.163e-02  -8.920  < 2e-16 ***
month2             -2.810e-02  1.131e-02  -2.484 0.013062 *  
month3              1.604e-02  1.118e-02   1.434 0.151747    
month4              4.139e-02  1.191e-02   3.475 0.000522 ***
month5              5.002e-02  1.197e-02   4.178 3.06e-05 ***
month6              8.568e-02  1.240e-02   6.909 6.42e-12 ***
month7              1.364e-01  1.189e-02  11.467  < 2e-16 ***
month8              1.660e-01  1.213e-02  13.683  < 2e-16 ***
month9              1.869e-01  1.246e-02  15.003  < 2e-16 ***
month10             1.834e-01  1.200e-02  15.284  < 2e-16 ***
month11             1.110e-01  1.219e-02   9.106  < 2e-16 ***
month12             1.282e-02  1.218e-02   1.052 0.292784    
year2016           -2.210e-02  6.564e-03  -3.367 0.000773 ***
year2017            1.032e-01  6.612e-03  15.613  < 2e-16 ***
year2018            1.152e-01  1.202e-02   9.585  < 2e-16 ***
x4046              -1.102e-08  6.424e-09  -1.715 0.086432 .  
total_volume       -5.133e-08  4.212e-09 -12.187  < 2e-16 ***
large_bags          1.897e-08  1.556e-08   1.219 0.222961    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.1163 on 2138 degrees of freedom
Multiple R-squared:  0.8074,    Adjusted R-squared:  0.8052 
F-statistic: 358.6 on 25 and 2138 DF,  p-value: < 2.2e-16
plot(lm9)

lm10 <- lm(log(average_price) ~ type + region + month + year + x4046 + total_volume + large_bags + type:total_volume, train)

summary(lm10)

Call:
lm(formula = log(average_price) ~ type + region + month + year + 
    x4046 + total_volume + large_bags + type:total_volume, data = train)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.40965 -0.06135  0.00169  0.06239  0.66682 

Coefficients:
                           Estimate Std. Error t value Pr(>|t|)    
(Intercept)               3.951e-01  1.641e-02  24.086  < 2e-16 ***
typeorganic               2.021e-01  1.083e-02  18.663  < 2e-16 ***
regionGreatLakes         -1.327e-01  9.760e-03 -13.594  < 2e-16 ***
regionMidsouth           -1.250e-01  9.802e-03 -12.753  < 2e-16 ***
regionNortheast           8.775e-02  9.721e-03   9.026  < 2e-16 ***
regionPlains             -2.177e-01  1.129e-02 -19.293  < 2e-16 ***
regionSouthCentral       -2.844e-01  9.219e-03 -30.854  < 2e-16 ***
regionSoutheast          -1.638e-01  1.073e-02 -15.262  < 2e-16 ***
regionWest               -2.152e-02  1.037e-02  -2.076 0.038055 *  
month2                   -6.027e-03  9.708e-03  -0.621 0.534790    
month3                    3.897e-02  9.600e-03   4.059 5.10e-05 ***
month4                    7.302e-02  1.025e-02   7.124 1.42e-12 ***
month5                    9.117e-02  1.035e-02   8.812  < 2e-16 ***
month6                    1.098e-01  1.064e-02  10.316  < 2e-16 ***
month7                    1.510e-01  1.018e-02  14.823  < 2e-16 ***
month8                    1.730e-01  1.038e-02  16.667  < 2e-16 ***
month9                    1.893e-01  1.065e-02  17.768  < 2e-16 ***
month10                   1.768e-01  1.026e-02  17.227  < 2e-16 ***
month11                   1.034e-01  1.043e-02   9.919  < 2e-16 ***
month12                   2.169e-02  1.042e-02   2.081 0.037565 *  
year2016                  2.092e-02  5.820e-03   3.595 0.000332 ***
year2017                  1.678e-01  6.106e-03  27.475  < 2e-16 ***
year2018                  2.330e-01  1.111e-02  20.977  < 2e-16 ***
x4046                     2.568e-08  5.648e-09   4.547 5.74e-06 ***
total_volume             -9.083e-08  3.868e-09 -23.481  < 2e-16 ***
large_bags               -3.017e-08  1.342e-08  -2.248 0.024694 *  
typeorganic:total_volume -1.488e-06  5.309e-08 -28.032  < 2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.09949 on 2137 degrees of freedom
Multiple R-squared:  0.8592,    Adjusted R-squared:  0.8575 
F-statistic: 501.6 on 26 and 2137 DF,  p-value: < 2.2e-16
plot(lm10)

lm11 <- lm(log(average_price) ~ type + region + month + year + x4046 + total_volume + large_bags + type:total_volume + type:region, train)

lm12 <- lm(log(average_price) ~ type + region + month + year + x4046 + total_volume + large_bags + type:total_volume + type:region + type:year + region:total_volume, train)

summary(lm11)

Call:
lm(formula = log(average_price) ~ type + region + month + year + 
    x4046 + total_volume + large_bags + type:total_volume + type:region, 
    data = train)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.40733 -0.05652  0.00533  0.05971  0.71079 

Coefficients:
                                 Estimate Std. Error t value Pr(>|t|)    
(Intercept)                     5.697e-01  2.595e-02  21.956  < 2e-16 ***
typeorganic                     2.742e-02  2.660e-02   1.031 0.302616    
regionGreatLakes               -2.170e-01  1.798e-02 -12.070  < 2e-16 ***
regionMidsouth                 -2.286e-01  1.778e-02 -12.852  < 2e-16 ***
regionNortheast                 6.700e-03  2.023e-02   0.331 0.740587    
regionPlains                   -3.982e-01  2.004e-02 -19.872  < 2e-16 ***
regionSouthCentral             -2.632e-01  1.549e-02 -16.988  < 2e-16 ***
regionSoutheast                -2.212e-01  1.897e-02 -11.660  < 2e-16 ***
regionWest                     -9.722e-02  1.625e-02  -5.983 2.56e-09 ***
month2                          1.717e-03  9.194e-03   0.187 0.851907    
month3                          3.929e-02  9.064e-03   4.334 1.53e-05 ***
month4                          7.673e-02  9.686e-03   7.922 3.72e-15 ***
month5                          1.005e-01  9.797e-03  10.261  < 2e-16 ***
month6                          1.165e-01  1.007e-02  11.569  < 2e-16 ***
month7                          1.506e-01  9.611e-03  15.665  < 2e-16 ***
month8                          1.707e-01  9.803e-03  17.411  < 2e-16 ***
month9                          1.838e-01  1.009e-02  18.215  < 2e-16 ***
month10                         1.622e-01  9.804e-03  16.539  < 2e-16 ***
month11                         9.294e-02  9.927e-03   9.362  < 2e-16 ***
month12                         1.435e-02  9.870e-03   1.454 0.146176    
year2016                        2.238e-02  5.545e-03   4.036 5.64e-05 ***
year2017                        1.694e-01  5.822e-03  29.092  < 2e-16 ***
year2018                        2.431e-01  1.056e-02  23.020  < 2e-16 ***
x4046                           9.954e-09  1.040e-08   0.957 0.338432    
total_volume                   -1.128e-07  5.639e-09 -20.004  < 2e-16 ***
large_bags                      1.579e-08  1.667e-08   0.947 0.343529    
typeorganic:total_volume       -1.547e-06  5.493e-08 -28.162  < 2e-16 ***
typeorganic:regionGreatLakes    7.198e-02  2.114e-02   3.405 0.000675 ***
typeorganic:regionMidsouth      1.084e-01  2.108e-02   5.142 2.97e-07 ***
typeorganic:regionNortheast     8.498e-02  2.317e-02   3.668 0.000251 ***
typeorganic:regionPlains        2.464e-01  2.329e-02  10.581  < 2e-16 ***
typeorganic:regionSouthCentral -4.427e-02  1.942e-02  -2.279 0.022747 *  
typeorganic:regionSoutheast     3.479e-02  2.255e-02   1.543 0.123010    
typeorganic:regionWest          1.267e-01  1.948e-02   6.503 9.78e-11 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.0938 on 2130 degrees of freedom
Multiple R-squared:  0.8753,    Adjusted R-squared:  0.8733 
F-statistic: 452.9 on 33 and 2130 DF,  p-value: < 2.2e-16
plot(lm11)

glance(lm11)
glance(lm12)
train_rmse <- train %>% 
  add_residuals(lm12) %>% 
  mutate(sq_resid = resid^2) %>% 
  summarise(mse = mean(sq_resid),
            rmse = mse^0.5) %>% 
  pull(rmse)


train_rmse
[1] 0.08717599
predictions_test <- test %>% 
  add_predictions(lm12) %>% 
  add_residuals(lm12) %>% 
  dplyr::select(average_price, pred, resid) 
  
test_rsme <- predictions_test %>% 
  mutate(sq_resid = resid^2) %>% 
  summarise(mse = mean(sq_resid),
            rmse = mse^0.5) %>% 
  pull(rmse)

test_rsme / train_rmse
[1] 1.017817
cv_10_fold <- trainControl(
  method = "cv",
  number = 10,
  savePredictions = TRUE
)

model2 <- train(log(average_price) ~ type + region + month + year + x4046 + total_volume + large_bags + type:total_volume, 
               avocados, 
               trControl = cv_10_fold, 
               method = "lm")

 model2$resample %>% 
  summarise(av_r2 = mean(Rsquared),
            av_rmse = mean(RMSE))
cv_10_fold <- trainControl(
  method = "cv",
  number = 10,
  savePredictions = TRUE
)

model2 <- train(log(average_price) ~ type + region + month + year + x4046+ total_volume + large_bags + type:total_volume + type:region + type:year + region:total_volume, 
               avocados, 
               trControl = cv_10_fold, 
               method = "lm")

 model2$resample %>% 
  summarise(av_r2 = mean(Rsquared),
            av_rmse = mean(RMSE))
calc.relimp(lm9, type = "lmg", rela = TRUE)
Response variable: log(average_price) 
Total response variance: 0.0694549 
Analysis based on 2164 observations 

25 Regressors: 
Some regressors combined in groups: 
        Group  region : regionGreatLakes regionMidsouth regionNortheast regionPlains regionSouthCentral regionSoutheast regionWest 
        Group  month : month2 month3 month4 month5 month6 month7 month8 month9 month10 month11 month12 
        Group  year : year2016 year2017 year2018 

 Relative importance of 7 (groups of) regressors assessed: 
 region month year type x4046 total_volume large_bags 
 
Proportion of variance explained by model: 80.74%
Metrics are normalized to sum to 100% (rela=TRUE). 

Relative importance metrics: 

                    lmg
region       0.13937651
month        0.08720089
year         0.05339973
type         0.20057461
x4046        0.21021625
total_volume 0.24157541
large_bags   0.06765660

Average coefficients for different model sizes: 

                          1group       2groups       3groups       4groups       5groups       6groups       7groups
type                3.650626e-01  2.818672e-01  2.268670e-01  1.923404e-01  1.708109e-01  1.557828e-01  1.420239e-01
regionGreatLakes   -3.105644e-02 -6.536054e-02 -9.093596e-02 -1.060173e-01 -1.109577e-01 -1.086055e-01 -1.039330e-01
regionMidsouth      2.355831e-02 -2.252880e-02 -5.146003e-02 -6.662130e-02 -7.149307e-02 -6.993080e-02 -6.573028e-02
regionNortheast     1.455191e-01  1.028825e-01  7.976257e-02  7.198282e-02  7.456472e-02  8.161071e-02  8.701139e-02
regionPlains        2.070846e-02 -2.855559e-02 -6.105776e-02 -8.005414e-02 -8.894825e-02 -9.069393e-02 -8.714929e-02
regionSouthCentral -2.536335e-01 -2.280866e-01 -2.218658e-01 -2.263421e-01 -2.342857e-01 -2.405727e-01 -2.427310e-01
regionSoutheast     3.531651e-03 -5.199057e-03 -2.416280e-02 -4.418394e-02 -5.879663e-02 -6.513071e-02 -6.407696e-02
regionWest         -1.085448e-01 -7.199524e-02 -7.201130e-02 -8.630120e-02 -9.923357e-02 -1.040612e-01 -1.037239e-01
month2             -5.186263e-02 -3.902048e-02 -3.161419e-02 -2.844915e-02 -2.789532e-02 -2.815423e-02 -2.810317e-02
month3              1.695275e-02  1.387872e-02  1.331812e-02  1.377802e-02  1.445124e-02  1.523128e-02  1.603579e-02
month4              2.661084e-02  2.808541e-02  3.189449e-02  3.549137e-02  3.784346e-02  3.968660e-02  4.138582e-02
month5              8.649173e-03  1.885438e-02  2.869720e-02  3.626468e-02  4.127649e-02  4.559877e-02  5.002444e-02
month6              4.495922e-02  5.462191e-02  6.464176e-02  7.263623e-02  7.786383e-02  8.187851e-02  8.568132e-02
month7              1.092868e-01  1.106613e-01  1.163355e-01  1.227247e-01  1.278965e-01  1.322363e-01  1.363588e-01
month8              1.587040e-01  1.504023e-01  1.509025e-01  1.549697e-01  1.591645e-01  1.628472e-01  1.659996e-01
month9              1.794670e-01  1.713216e-01  1.709562e-01  1.742895e-01  1.785286e-01  1.829385e-01  1.868981e-01
month10             1.925655e-01  1.803850e-01  1.756806e-01  1.754526e-01  1.774417e-01  1.806015e-01  1.833873e-01
month11             1.160070e-01  1.026353e-01  9.823998e-02  9.898124e-02  1.021506e-01  1.067620e-01  1.109910e-01
month12             1.376898e-02  6.654850e-03  4.376123e-03  4.940819e-03  6.959231e-03  1.001879e-02  1.282026e-02
year2016           -3.579802e-02 -2.461379e-02 -2.161431e-02 -2.253051e-02 -2.362133e-02 -2.302443e-02 -2.210155e-02
year2017            8.610992e-02  1.014739e-01  1.062426e-01  1.053060e-01  1.033841e-01  1.031367e-01  1.032399e-01
year2018           -7.601043e-03  4.083933e-02  6.759340e-02  8.145847e-02  9.148388e-02  1.032011e-01  1.152293e-01
x4046              -1.813739e-07 -1.520503e-07 -1.205773e-07 -8.851581e-08 -5.791840e-08 -3.127724e-08 -1.101968e-08
total_volume       -8.316772e-08 -7.559854e-08 -6.875720e-08 -6.279969e-08 -5.790218e-08 -5.412286e-08 -5.133306e-08
large_bags         -4.656543e-07 -2.851997e-07 -1.487453e-07 -5.749024e-08 -7.636612e-09  1.164011e-08  1.897008e-08

AUTOMATED


#### GARBAGE

regsubset_forwards <- regsubsets(average_price ~ ., 
                          data = avocados,
                          nvmax = 40,
                          method = "forward")


summary(regsubset_forwards)
Subset selection object
Call: regsubsets.formula(average_price ~ ., data = avocados, nvmax = 40, 
    method = "forward")
28 Variables  (and intercept)
                   Forced in Forced out
total_volume           FALSE      FALSE
x4046                  FALSE      FALSE
x4225                  FALSE      FALSE
x4770                  FALSE      FALSE
small_bags             FALSE      FALSE
large_bags             FALSE      FALSE
typeorganic            FALSE      FALSE
year2016               FALSE      FALSE
year2017               FALSE      FALSE
year2018               FALSE      FALSE
regionGreatLakes       FALSE      FALSE
regionMidsouth         FALSE      FALSE
regionNortheast        FALSE      FALSE
regionPlains           FALSE      FALSE
regionSouthCentral     FALSE      FALSE
regionSoutheast        FALSE      FALSE
regionWest             FALSE      FALSE
month2                 FALSE      FALSE
month3                 FALSE      FALSE
month4                 FALSE      FALSE
month5                 FALSE      FALSE
month6                 FALSE      FALSE
month7                 FALSE      FALSE
month8                 FALSE      FALSE
month9                 FALSE      FALSE
month10                FALSE      FALSE
month11                FALSE      FALSE
month12                FALSE      FALSE
1 subsets of each size up to 28
Selection Algorithm: forward
          total_volume x4046 x4225 x4770 small_bags large_bags typeorganic year2016 year2017 year2018
1  ( 1 )  "*"          " "   " "   " "   " "        " "        " "         " "      " "      " "     
2  ( 1 )  "*"          " "   " "   " "   " "        " "        " "         " "      " "      " "     
3  ( 1 )  "*"          " "   " "   " "   " "        " "        " "         " "      "*"      " "     
4  ( 1 )  "*"          " "   " "   " "   " "        " "        " "         " "      "*"      " "     
5  ( 1 )  "*"          " "   " "   " "   " "        " "        "*"         " "      "*"      " "     
6  ( 1 )  "*"          " "   " "   " "   " "        " "        "*"         " "      "*"      " "     
7  ( 1 )  "*"          " "   " "   " "   " "        " "        "*"         " "      "*"      " "     
8  ( 1 )  "*"          " "   " "   " "   " "        " "        "*"         " "      "*"      " "     
9  ( 1 )  "*"          " "   " "   " "   " "        " "        "*"         " "      "*"      " "     
10  ( 1 ) "*"          " "   " "   " "   " "        " "        "*"         " "      "*"      " "     
11  ( 1 ) "*"          " "   " "   " "   " "        " "        "*"         " "      "*"      "*"     
12  ( 1 ) "*"          " "   " "   " "   " "        " "        "*"         " "      "*"      "*"     
13  ( 1 ) "*"          " "   " "   " "   " "        " "        "*"         " "      "*"      "*"     
14  ( 1 ) "*"          " "   " "   " "   " "        " "        "*"         " "      "*"      "*"     
15  ( 1 ) "*"          " "   " "   " "   " "        " "        "*"         " "      "*"      "*"     
16  ( 1 ) "*"          " "   " "   "*"   " "        " "        "*"         " "      "*"      "*"     
17  ( 1 ) "*"          " "   " "   "*"   " "        " "        "*"         "*"      "*"      "*"     
18  ( 1 ) "*"          " "   "*"   "*"   " "        " "        "*"         "*"      "*"      "*"     
19  ( 1 ) "*"          " "   "*"   "*"   " "        " "        "*"         "*"      "*"      "*"     
20  ( 1 ) "*"          " "   "*"   "*"   " "        " "        "*"         "*"      "*"      "*"     
21  ( 1 ) "*"          " "   "*"   "*"   "*"        " "        "*"         "*"      "*"      "*"     
22  ( 1 ) "*"          " "   "*"   "*"   "*"        " "        "*"         "*"      "*"      "*"     
23  ( 1 ) "*"          " "   "*"   "*"   "*"        " "        "*"         "*"      "*"      "*"     
24  ( 1 ) "*"          " "   "*"   "*"   "*"        " "        "*"         "*"      "*"      "*"     
25  ( 1 ) "*"          " "   "*"   "*"   "*"        " "        "*"         "*"      "*"      "*"     
26  ( 1 ) "*"          "*"   "*"   "*"   "*"        " "        "*"         "*"      "*"      "*"     
27  ( 1 ) "*"          "*"   "*"   "*"   "*"        "*"        "*"         "*"      "*"      "*"     
28  ( 1 ) "*"          "*"   "*"   "*"   "*"        "*"        "*"         "*"      "*"      "*"     
          regionGreatLakes regionMidsouth regionNortheast regionPlains regionSouthCentral regionSoutheast regionWest
1  ( 1 )  " "              " "            " "             " "          " "                " "             " "       
2  ( 1 )  " "              " "            "*"             " "          " "                " "             " "       
3  ( 1 )  " "              " "            "*"             " "          " "                " "             " "       
4  ( 1 )  " "              " "            "*"             " "          "*"                " "             " "       
5  ( 1 )  " "              " "            "*"             " "          "*"                " "             " "       
6  ( 1 )  " "              " "            "*"             " "          "*"                " "             " "       
7  ( 1 )  " "              " "            "*"             " "          "*"                " "             " "       
8  ( 1 )  " "              " "            "*"             " "          "*"                " "             " "       
9  ( 1 )  " "              " "            "*"             " "          "*"                " "             " "       
10  ( 1 ) " "              " "            "*"             " "          "*"                " "             " "       
11  ( 1 ) " "              " "            "*"             " "          "*"                " "             " "       
12  ( 1 ) " "              " "            "*"             " "          "*"                " "             " "       
13  ( 1 ) "*"              " "            "*"             " "          "*"                " "             " "       
14  ( 1 ) "*"              " "            "*"             " "          "*"                " "             "*"       
15  ( 1 ) "*"              " "            "*"             " "          "*"                " "             "*"       
16  ( 1 ) "*"              " "            "*"             " "          "*"                " "             "*"       
17  ( 1 ) "*"              " "            "*"             " "          "*"                " "             "*"       
18  ( 1 ) "*"              " "            "*"             " "          "*"                " "             "*"       
19  ( 1 ) "*"              " "            "*"             " "          "*"                " "             "*"       
20  ( 1 ) "*"              " "            "*"             " "          "*"                " "             "*"       
21  ( 1 ) "*"              " "            "*"             " "          "*"                " "             "*"       
22  ( 1 ) "*"              "*"            "*"             " "          "*"                " "             "*"       
23  ( 1 ) "*"              "*"            "*"             "*"          "*"                " "             "*"       
24  ( 1 ) "*"              "*"            "*"             "*"          "*"                "*"             "*"       
25  ( 1 ) "*"              "*"            "*"             "*"          "*"                "*"             "*"       
26  ( 1 ) "*"              "*"            "*"             "*"          "*"                "*"             "*"       
27  ( 1 ) "*"              "*"            "*"             "*"          "*"                "*"             "*"       
28  ( 1 ) "*"              "*"            "*"             "*"          "*"                "*"             "*"       
          month2 month3 month4 month5 month6 month7 month8 month9 month10 month11 month12
1  ( 1 )  " "    " "    " "    " "    " "    " "    " "    " "    " "     " "     " "    
2  ( 1 )  " "    " "    " "    " "    " "    " "    " "    " "    " "     " "     " "    
3  ( 1 )  " "    " "    " "    " "    " "    " "    " "    " "    " "     " "     " "    
4  ( 1 )  " "    " "    " "    " "    " "    " "    " "    " "    " "     " "     " "    
5  ( 1 )  " "    " "    " "    " "    " "    " "    " "    " "    " "     " "     " "    
6  ( 1 )  " "    " "    " "    " "    " "    " "    " "    " "    "*"     " "     " "    
7  ( 1 )  " "    " "    " "    " "    " "    " "    " "    "*"    "*"     " "     " "    
8  ( 1 )  " "    " "    " "    " "    " "    " "    "*"    "*"    "*"     " "     " "    
9  ( 1 )  " "    " "    " "    " "    " "    "*"    "*"    "*"    "*"     " "     " "    
10  ( 1 ) " "    " "    " "    " "    " "    "*"    "*"    "*"    "*"     "*"     " "    
11  ( 1 ) " "    " "    " "    " "    " "    "*"    "*"    "*"    "*"     "*"     " "    
12  ( 1 ) " "    " "    " "    " "    "*"    "*"    "*"    "*"    "*"     "*"     " "    
13  ( 1 ) " "    " "    " "    " "    "*"    "*"    "*"    "*"    "*"     "*"     " "    
14  ( 1 ) " "    " "    " "    " "    "*"    "*"    "*"    "*"    "*"     "*"     " "    
15  ( 1 ) "*"    " "    " "    " "    "*"    "*"    "*"    "*"    "*"     "*"     " "    
16  ( 1 ) "*"    " "    " "    " "    "*"    "*"    "*"    "*"    "*"     "*"     " "    
17  ( 1 ) "*"    " "    " "    " "    "*"    "*"    "*"    "*"    "*"     "*"     " "    
18  ( 1 ) "*"    " "    " "    " "    "*"    "*"    "*"    "*"    "*"     "*"     " "    
19  ( 1 ) "*"    " "    " "    "*"    "*"    "*"    "*"    "*"    "*"     "*"     " "    
20  ( 1 ) "*"    " "    "*"    "*"    "*"    "*"    "*"    "*"    "*"     "*"     " "    
21  ( 1 ) "*"    " "    "*"    "*"    "*"    "*"    "*"    "*"    "*"     "*"     " "    
22  ( 1 ) "*"    " "    "*"    "*"    "*"    "*"    "*"    "*"    "*"     "*"     " "    
23  ( 1 ) "*"    " "    "*"    "*"    "*"    "*"    "*"    "*"    "*"     "*"     " "    
24  ( 1 ) "*"    " "    "*"    "*"    "*"    "*"    "*"    "*"    "*"     "*"     " "    
25  ( 1 ) "*"    " "    "*"    "*"    "*"    "*"    "*"    "*"    "*"     "*"     "*"    
26  ( 1 ) "*"    " "    "*"    "*"    "*"    "*"    "*"    "*"    "*"     "*"     "*"    
27  ( 1 ) "*"    " "    "*"    "*"    "*"    "*"    "*"    "*"    "*"     "*"     "*"    
28  ( 1 ) "*"    "*"    "*"    "*"    "*"    "*"    "*"    "*"    "*"     "*"     "*"    
plot(regsubset_forwards,
     scale = "bic")


sum_forward <- summary(regsubset_forwards)

plot(sum_forward$bic,
     type = "b")

# glmulti


glmulti_fit <- glmulti(
  log(average_price) ~ ., # model to fit, in this case, charges varies with everything
  level = 2, # level = 2 means try pairwise interactions. level = 1 means main effects only
  data = train, # data to use for fitting
  minsize = 0, # min size of model to try, in number of predictors
  maxsize = -1, # max size to try, set to -1 for unlimited
  marginality = TRUE, # marginality true means include pairwise interaction only if both main effects present in model.  
  method = "d", # method "d" means trial run, to get size of problem. Set to "h" for exhaustive search, or "g" for genetic algorithm
  confsetsize = 10, # how many models should glmulti() return? Must be less than total size of problem
  plotty = FALSE, # provide progress plots? Generally annoying.
  report = TRUE, # provide progress reports? Generally useful.
  fitfunction = lm, # use lm() as fit function. Can also use glm() for logistic regression.
  crit = aic # criterion for selecting best models. 
)
Initialization...
TASK: Diagnostic of candidate set.
Sample size: 2164
4 factor(s).
7 covariate(s).
0 f exclusion(s).
0 c exclusion(s).
0 f:f exclusion(s).
0 c:c exclusion(s).
0 f:c exclusion(s).
Size constraints: min =  0 max = -1
Complexity constraints: min =  0 max = -1
Marginality rule.
Your candidate set contains more than 1 billion (1e9) models.
lm_multi <- lm(log(average_price)~1+type+year+region+month+total_volume+x4046+x4225+x4770+small_bags+large_bags+year:type+region:type+region:year+month:type+month:year+month:region+x4046:total_volume+x4225:total_volume+small_bags:total_volume+small_bags:x4046+small_bags:x4225+small_bags:x4770+large_bags:total_volume+large_bags:small_bags+type:total_volume+type:x4225+year:total_volume+year:x4046+year:x4770+year:large_bags+region:total_volume+region:x4225+region:large_bags+month:total_volume+month:x4770+month:small_bags, train) 

summary(lm_multi)

Call:
lm(formula = log(average_price) ~ 1 + type + year + region + 
    month + total_volume + x4046 + x4225 + x4770 + small_bags + 
    large_bags + year:type + region:type + region:year + month:type + 
    month:year + month:region + x4046:total_volume + x4225:total_volume + 
    small_bags:total_volume + small_bags:x4046 + small_bags:x4225 + 
    small_bags:x4770 + large_bags:total_volume + large_bags:small_bags + 
    type:total_volume + type:x4225 + year:total_volume + year:x4046 + 
    year:x4770 + year:large_bags + region:total_volume + region:x4225 + 
    region:large_bags + month:total_volume + month:x4770 + month:small_bags, 
    data = train)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.25806 -0.03281  0.00058  0.03568  0.34184 

Coefficients: (9 not defined because of singularities)
                                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)                      6.255e-01  8.460e-02   7.393 2.13e-13 ***
typeorganic                     -8.939e-02  8.080e-02  -1.106 0.268751    
year2016                         2.364e-01  3.002e-02   7.874 5.71e-15 ***
year2017                         2.824e-01  3.157e-02   8.944  < 2e-16 ***
year2018                         5.397e-01  5.018e-02  10.756  < 2e-16 ***
regionGreatLakes                -9.074e-02  7.469e-02  -1.215 0.224568    
regionMidsouth                  -2.542e-01  7.638e-02  -3.328 0.000892 ***
regionNortheast                 -5.563e-02  8.342e-02  -0.667 0.504925    
regionPlains                    -1.702e-01  8.056e-02  -2.113 0.034726 *  
regionSouthCentral              -2.062e-01  7.704e-02  -2.677 0.007497 ** 
regionSoutheast                 -5.594e-02  7.423e-02  -0.754 0.451143    
regionWest                       5.367e-02  7.567e-02   0.709 0.478286    
month2                           5.724e-02  3.871e-02   1.479 0.139351    
month3                           1.440e-01  3.920e-02   3.675 0.000245 ***
month4                           1.436e-01  4.212e-02   3.410 0.000664 ***
month5                           1.214e-01  4.336e-02   2.799 0.005176 ** 
month6                           1.498e-01  4.709e-02   3.180 0.001495 ** 
month7                           1.722e-01  4.461e-02   3.860 0.000117 ***
month8                           1.950e-01  4.713e-02   4.136 3.68e-05 ***
month9                           7.187e-02  4.665e-02   1.541 0.123600    
month10                          2.078e-01  4.376e-02   4.748 2.21e-06 ***
month11                          5.381e-02  4.353e-02   1.236 0.216574    
month12                         -7.273e-02  4.378e-02  -1.661 0.096787 .  
total_volume                     1.854e-07  1.676e-07   1.106 0.268789    
x4046                           -3.609e-07  1.706e-07  -2.115 0.034527 *  
x4225                           -4.336e-07  1.732e-07  -2.504 0.012379 *  
x4770                           -2.602e-07  2.102e-07  -1.238 0.215938    
small_bags                      -1.680e-07  1.696e-07  -0.991 0.321969    
large_bags                      -1.011e-06  2.116e-07  -4.780 1.88e-06 ***
typeorganic:year2016            -2.288e-01  2.209e-02 -10.356  < 2e-16 ***
typeorganic:year2017            -2.323e-01  2.354e-02  -9.870  < 2e-16 ***
typeorganic:year2018            -2.189e-01  4.109e-02  -5.326 1.12e-07 ***
typeorganic:regionGreatLakes     3.226e-02  6.946e-02   0.464 0.642406    
typeorganic:regionMidsouth       2.561e-01  7.180e-02   3.566 0.000371 ***
typeorganic:regionNortheast      2.220e-01  7.880e-02   2.818 0.004883 ** 
typeorganic:regionPlains         2.461e-01  7.620e-02   3.229 0.001263 ** 
typeorganic:regionSouthCentral   8.328e-02  7.340e-02   1.135 0.256716    
typeorganic:regionSoutheast      1.232e-01  7.001e-02   1.759 0.078662 .  
typeorganic:regionWest           3.204e-02  7.012e-02   0.457 0.647763    
year2016:regionGreatLakes       -6.432e-02  1.707e-02  -3.767 0.000170 ***
year2017:regionGreatLakes       -3.119e-02  1.838e-02  -1.697 0.089933 .  
year2018:regionGreatLakes       -5.608e-02  3.125e-02  -1.795 0.072868 .  
year2016:regionMidsouth         -2.708e-02  1.651e-02  -1.640 0.101071    
year2017:regionMidsouth          4.999e-03  1.702e-02   0.294 0.768955    
year2018:regionMidsouth         -7.916e-02  3.113e-02  -2.543 0.011064 *  
year2016:regionNortheast         5.951e-02  1.757e-02   3.387 0.000721 ***
year2017:regionNortheast         9.892e-02  1.898e-02   5.211 2.08e-07 ***
year2018:regionNortheast         6.028e-02  3.386e-02   1.780 0.075210 .  
year2016:regionPlains           -1.365e-01  1.777e-02  -7.681 2.51e-14 ***
year2017:regionPlains           -1.580e-01  1.763e-02  -8.966  < 2e-16 ***
year2018:regionPlains           -2.518e-01  3.055e-02  -8.243 3.09e-16 ***
year2016:regionSouthCentral     -8.456e-02  1.685e-02  -5.019 5.67e-07 ***
year2017:regionSouthCentral     -7.148e-02  1.688e-02  -4.234 2.41e-05 ***
year2018:regionSouthCentral     -8.377e-02  2.987e-02  -2.804 0.005093 ** 
year2016:regionSoutheast        -1.538e-01  1.765e-02  -8.716  < 2e-16 ***
year2017:regionSoutheast        -1.358e-01  1.726e-02  -7.868 6.00e-15 ***
year2018:regionSoutheast        -2.670e-01  3.084e-02  -8.657  < 2e-16 ***
year2016:regionWest             -4.118e-02  1.835e-02  -2.244 0.024977 *  
year2017:regionWest              2.351e-02  1.921e-02   1.224 0.221023    
year2018:regionWest              2.031e-02  3.256e-02   0.624 0.532910    
typeorganic:month2              -3.672e-02  3.077e-02  -1.193 0.232840    
typeorganic:month3              -9.784e-02  3.119e-02  -3.137 0.001733 ** 
typeorganic:month4              -7.267e-02  3.374e-02  -2.154 0.031386 *  
typeorganic:month5              -6.724e-02  3.503e-02  -1.920 0.055054 .  
typeorganic:month6              -6.012e-02  3.756e-02  -1.601 0.109620    
typeorganic:month7              -4.639e-02  3.450e-02  -1.345 0.178859    
typeorganic:month8              -9.407e-03  3.588e-02  -0.262 0.793188    
typeorganic:month9               7.776e-02  3.586e-02   2.168 0.030255 *  
typeorganic:month10             -6.154e-02  3.350e-02  -1.837 0.066352 .  
typeorganic:month11              1.436e-02  3.414e-02   0.421 0.673979    
typeorganic:month12              6.533e-02  3.386e-02   1.929 0.053820 .  
year2016:month2                  8.061e-03  1.793e-02   0.450 0.653024    
year2017:month2                 -1.553e-02  1.825e-02  -0.851 0.394887    
year2018:month2                 -3.545e-02  1.906e-02  -1.860 0.063085 .  
year2016:month3                 -1.648e-03  1.733e-02  -0.095 0.924277    
year2017:month3                  7.285e-02  1.770e-02   4.114 4.04e-05 ***
year2018:month3                 -3.362e-02  1.928e-02  -1.743 0.081460 .  
year2016:month4                 -2.984e-02  1.788e-02  -1.669 0.095291 .  
year2017:month4                  1.326e-01  1.829e-02   7.249 6.06e-13 ***
year2018:month4                         NA         NA      NA       NA    
year2016:month5                 -2.726e-02  1.697e-02  -1.606 0.108426    
year2017:month5                  1.852e-01  1.903e-02   9.733  < 2e-16 ***
year2018:month5                         NA         NA      NA       NA    
year2016:month6                  1.813e-03  1.784e-02   0.102 0.919084    
year2017:month6                  1.589e-01  1.903e-02   8.352  < 2e-16 ***
year2018:month6                         NA         NA      NA       NA    
year2016:month7                  8.302e-02  1.814e-02   4.576 5.05e-06 ***
year2017:month7                  1.712e-01  1.823e-02   9.395  < 2e-16 ***
year2018:month7                         NA         NA      NA       NA    
year2016:month8                  6.478e-02  1.910e-02   3.392 0.000708 ***
year2017:month8                  2.101e-01  1.835e-02  11.453  < 2e-16 ***
year2018:month8                         NA         NA      NA       NA    
year2016:month9                  6.232e-02  2.017e-02   3.089 0.002037 ** 
year2017:month9                  2.689e-01  1.995e-02  13.478  < 2e-16 ***
year2018:month9                         NA         NA      NA       NA    
year2016:month10                 9.812e-02  1.890e-02   5.191 2.31e-07 ***
year2017:month10                 2.455e-01  1.893e-02  12.968  < 2e-16 ***
year2018:month10                        NA         NA      NA       NA    
year2016:month11                 1.926e-01  1.829e-02  10.531  < 2e-16 ***
year2017:month11                 1.938e-01  1.894e-02  10.232  < 2e-16 ***
year2018:month11                        NA         NA      NA       NA    
year2016:month12                 6.625e-02  1.841e-02   3.598 0.000328 ***
year2017:month12                 1.472e-01  1.817e-02   8.100 9.70e-16 ***
year2018:month12                        NA         NA      NA       NA    
regionGreatLakes:month2         -5.983e-02  2.639e-02  -2.268 0.023463 *  
regionMidsouth:month2           -4.433e-02  2.681e-02  -1.654 0.098383 .  
regionNortheast:month2          -9.789e-03  2.630e-02  -0.372 0.709809    
regionPlains:month2             -4.942e-02  2.792e-02  -1.770 0.076900 .  
regionSouthCentral:month2        8.683e-04  2.563e-02   0.034 0.972976    
regionSoutheast:month2          -4.947e-02  2.683e-02  -1.844 0.065389 .  
regionWest:month2                1.564e-02  2.478e-02   0.631 0.528005    
regionGreatLakes:month3         -5.716e-02  2.627e-02  -2.175 0.029715 *  
regionMidsouth:month3           -9.828e-02  2.602e-02  -3.777 0.000164 ***
regionNortheast:month3          -4.060e-02  2.515e-02  -1.614 0.106627    
regionPlains:month3             -8.950e-02  2.762e-02  -3.241 0.001213 ** 
regionSouthCentral:month3       -1.596e-02  2.432e-02  -0.656 0.511742    
regionSoutheast:month3          -7.190e-02  2.635e-02  -2.728 0.006425 ** 
regionWest:month3                1.306e-02  2.433e-02   0.537 0.591516    
regionGreatLakes:month4         -5.804e-02  2.716e-02  -2.137 0.032732 *  
regionMidsouth:month4           -7.827e-02  2.724e-02  -2.874 0.004098 ** 
regionNortheast:month4          -3.249e-02  2.719e-02  -1.195 0.232252    
regionPlains:month4             -8.549e-02  2.988e-02  -2.861 0.004270 ** 
regionSouthCentral:month4       -3.315e-02  2.682e-02  -1.236 0.216650    
regionSoutheast:month4          -6.484e-02  2.800e-02  -2.316 0.020670 *  
regionWest:month4                2.436e-02  2.623e-02   0.929 0.353097    
regionGreatLakes:month5         -4.318e-02  2.846e-02  -1.517 0.129420    
regionMidsouth:month5           -7.995e-02  2.868e-02  -2.788 0.005363 ** 
regionNortheast:month5          -1.522e-03  2.728e-02  -0.056 0.955507    
regionPlains:month5             -3.121e-02  3.027e-02  -1.031 0.302678    
regionSouthCentral:month5        8.644e-05  2.747e-02   0.003 0.997490    
regionSoutheast:month5          -4.897e-02  2.838e-02  -1.725 0.084609 .  
regionWest:month5                6.408e-02  2.571e-02   2.493 0.012762 *  
regionGreatLakes:month6         -8.487e-02  3.090e-02  -2.747 0.006079 ** 
regionMidsouth:month6           -9.893e-02  2.972e-02  -3.329 0.000889 ***
regionNortheast:month6          -3.149e-02  2.940e-02  -1.071 0.284196    
regionPlains:month6             -7.005e-02  3.290e-02  -2.129 0.033363 *  
regionSouthCentral:month6       -5.460e-02  2.846e-02  -1.919 0.055184 .  
regionSoutheast:month6          -1.059e-01  3.088e-02  -3.430 0.000616 ***
regionWest:month6                2.486e-02  2.712e-02   0.917 0.359495    
regionGreatLakes:month7         -1.023e-01  2.825e-02  -3.620 0.000302 ***
regionMidsouth:month7           -1.434e-01  2.859e-02  -5.017 5.73e-07 ***
regionNortheast:month7          -1.018e-01  2.822e-02  -3.606 0.000319 ***
regionPlains:month7             -1.122e-01  3.012e-02  -3.726 0.000200 ***
regionSouthCentral:month7       -7.391e-02  2.746e-02  -2.692 0.007174 ** 
regionSoutheast:month7          -1.532e-01  2.957e-02  -5.182 2.42e-07 ***
regionWest:month7                2.250e-02  2.613e-02   0.861 0.389365    
regionGreatLakes:month8         -1.047e-01  2.939e-02  -3.563 0.000376 ***
regionMidsouth:month8           -1.354e-01  2.888e-02  -4.690 2.93e-06 ***
regionNortheast:month8          -1.813e-01  2.916e-02  -6.217 6.19e-10 ***
regionPlains:month8             -1.270e-01  3.198e-02  -3.972 7.38e-05 ***
regionSouthCentral:month8       -6.792e-02  2.865e-02  -2.370 0.017870 *  
regionSoutheast:month8          -1.494e-01  2.930e-02  -5.100 3.74e-07 ***
regionWest:month8                7.398e-03  2.718e-02   0.272 0.785475    
regionGreatLakes:month9         -4.479e-02  2.913e-02  -1.538 0.124330    
regionMidsouth:month9           -7.889e-02  2.942e-02  -2.682 0.007391 ** 
regionNortheast:month9          -1.303e-01  2.950e-02  -4.417 1.05e-05 ***
regionPlains:month9             -5.813e-02  3.243e-02  -1.793 0.073200 .  
regionSouthCentral:month9       -3.441e-02  2.919e-02  -1.179 0.238541    
regionSoutheast:month9          -5.982e-02  2.978e-02  -2.008 0.044744 *  
regionWest:month9               -5.648e-03  2.804e-02  -0.201 0.840359    
regionGreatLakes:month10        -8.735e-02  2.856e-02  -3.058 0.002258 ** 
regionMidsouth:month10          -1.234e-01  2.785e-02  -4.431 9.92e-06 ***
regionNortheast:month10         -2.329e-01  2.773e-02  -8.399  < 2e-16 ***
regionPlains:month10            -1.379e-01  2.929e-02  -4.709 2.67e-06 ***
regionSouthCentral:month10      -1.532e-02  2.776e-02  -0.552 0.580981    
regionSoutheast:month10         -1.142e-01  2.859e-02  -3.995 6.73e-05 ***
regionWest:month10              -3.187e-02  2.619e-02  -1.217 0.223842    
regionGreatLakes:month11        -7.780e-02  2.847e-02  -2.733 0.006334 ** 
regionMidsouth:month11          -8.089e-02  2.813e-02  -2.876 0.004074 ** 
regionNortheast:month11         -1.287e-01  2.832e-02  -4.544 5.87e-06 ***
regionPlains:month11            -8.710e-02  3.119e-02  -2.793 0.005277 ** 
regionSouthCentral:month11      -1.093e-02  2.803e-02  -0.390 0.696563    
regionSoutheast:month11         -1.068e-01  2.867e-02  -3.725 0.000201 ***
regionWest:month11              -2.611e-04  2.704e-02  -0.010 0.992299    
regionGreatLakes:month12        -1.416e-02  2.916e-02  -0.486 0.627347    
regionMidsouth:month12          -1.062e-02  2.975e-02  -0.357 0.721173    
regionNortheast:month12         -2.092e-03  2.859e-02  -0.073 0.941684    
regionPlains:month12            -7.905e-03  3.088e-02  -0.256 0.798004    
regionSouthCentral:month12       4.044e-02  2.852e-02   1.418 0.156439    
regionSoutheast:month12         -5.684e-02  2.903e-02  -1.958 0.050410 .  
regionWest:month12               4.433e-02  2.700e-02   1.642 0.100839    
total_volume:x4046               5.491e-15  4.480e-15   1.226 0.220527    
total_volume:x4225               2.581e-14  5.199e-15   4.965 7.47e-07 ***
total_volume:small_bags         -3.073e-14  1.337e-14  -2.299 0.021637 *  
x4046:small_bags                 3.539e-14  2.394e-14   1.478 0.139504    
x4225:small_bags                 2.093e-14  2.223e-14   0.941 0.346728    
x4770:small_bags                 1.491e-14  8.351e-14   0.179 0.858323    
total_volume:large_bags          2.089e-14  8.058e-15   2.593 0.009596 ** 
small_bags:large_bags            1.436e-13  3.743e-14   3.835 0.000130 ***
typeorganic:total_volume        -2.176e-06  7.548e-08 -28.824  < 2e-16 ***
typeorganic:x4225                2.534e-06  1.414e-07  17.923  < 2e-16 ***
year2016:total_volume           -8.392e-08  8.251e-09 -10.171  < 2e-16 ***
year2017:total_volume           -8.927e-08  8.699e-09 -10.263  < 2e-16 ***
year2018:total_volume           -1.135e-07  1.261e-08  -9.000  < 2e-16 ***
year2016:x4046                   7.846e-08  1.106e-08   7.096 1.80e-12 ***
year2017:x4046                   8.744e-08  1.235e-08   7.082 1.99e-12 ***
year2018:x4046                   1.099e-07  2.053e-08   5.355 9.59e-08 ***
year2016:x4770                   1.065e-07  6.667e-08   1.597 0.110473    
year2017:x4770                   1.203e-07  9.598e-08   1.254 0.210162    
year2018:x4770                   3.472e-08  1.470e-07   0.236 0.813225    
 [ reached getOption("max.print") -- omitted 57 rows ]
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.0601 on 1916 degrees of freedom
Multiple R-squared:  0.9539,    Adjusted R-squared:  0.948 
F-statistic: 160.6 on 247 and 1916 DF,  p-value: < 2.2e-16
glance(lm_multi)
train_rmse <- train %>% 
  add_residuals(lm_multi) %>% 
  mutate(sq_resid = resid^2) %>% 
  summarise(mse = mean(sq_resid),
            rmse = mse^0.5) %>% 
  pull(rmse)
Warning: prediction from a rank-deficient fit may be misleading
train_rmse
[1] 0.05013213
predictions_test <- test %>% 
  add_predictions(lm_multi) %>% 
  add_residuals(lm_multi) %>% 
  dplyr::select(average_price, pred, resid) 
Warning: prediction from a rank-deficient fit may be misleadingWarning: prediction from a rank-deficient fit may be misleading
  
test_rsme <- predictions_test %>% 
  mutate(sq_resid = resid^2) %>% 
  summarise(mse = mean(sq_resid),
            rmse = mse^0.5) %>% 
  pull(rmse)

test_rsme / train_rmse
[1] 1.265112
# over fit? despite having a good bic
lm_multi <- lm(log(average_price)~ 1 + region + type + year + month + total_volume + large_bags + year:type + month:year + region:total_volume + type:total_volume, train)

summary(lm_multi)

Call:
lm(formula = log(average_price) ~ 1 + region + type + year + 
    month + total_volume + large_bags + year:type + month:year + 
    region:total_volume + type:total_volume, data = train)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.34697 -0.04625  0.00192  0.04766  0.53098 

Coefficients: (9 not defined because of singularities)
                                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)                      4.663e-01  1.828e-02  25.506  < 2e-16 ***
regionGreatLakes                -5.460e-02  1.121e-02  -4.871 1.19e-06 ***
regionMidsouth                  -5.180e-02  1.038e-02  -4.993 6.45e-07 ***
regionNortheast                  1.023e-01  9.396e-03  10.884  < 2e-16 ***
regionPlains                    -4.999e-02  1.141e-02  -4.381 1.24e-05 ***
regionSouthCentral              -2.804e-01  9.669e-03 -28.998  < 2e-16 ***
regionSoutheast                 -1.015e-01  1.085e-02  -9.358  < 2e-16 ***
regionWest                       1.086e-01  1.271e-02   8.546  < 2e-16 ***
typeorganic                      1.814e-01  1.465e-02  12.383  < 2e-16 ***
year2016                         4.252e-02  1.509e-02   2.819 0.004869 ** 
year2017                         1.042e-01  1.552e-02   6.716 2.40e-11 ***
year2018                         2.433e-01  1.705e-02  14.265  < 2e-16 ***
month2                           8.632e-03  1.556e-02   0.555 0.579028    
month3                           1.649e-02  1.472e-02   1.120 0.262703    
month4                           6.147e-02  1.530e-02   4.017 6.10e-05 ***
month5                           5.158e-02  1.497e-02   3.446 0.000580 ***
month6                           7.818e-02  1.534e-02   5.095 3.79e-07 ***
month7                           8.055e-02  1.565e-02   5.148 2.88e-07 ***
month8                           8.920e-02  1.428e-02   6.246 5.07e-10 ***
month9                           6.977e-02  1.536e-02   4.543 5.86e-06 ***
month10                          3.473e-02  1.554e-02   2.235 0.025536 *  
month11                         -2.938e-02  1.468e-02  -2.001 0.045483 *  
month12                         -3.794e-02  1.494e-02  -2.538 0.011206 *  
total_volume                    -8.671e-08  2.850e-09 -30.428  < 2e-16 ***
large_bags                      -1.888e-01  1.366e-02 -13.821  < 2e-16 ***
typeorganic:year2016            -8.678e-02  8.663e-03 -10.018  < 2e-16 ***
typeorganic:year2017            -1.263e-01  9.171e-03 -13.767  < 2e-16 ***
typeorganic:year2018            -1.221e-01  1.564e-02  -7.810 8.92e-15 ***
year2016:month2                 -1.740e-02  2.082e-02  -0.836 0.403495    
year2017:month2                 -3.230e-02  2.114e-02  -1.528 0.126740    
year2018:month2                 -2.678e-02  2.166e-02  -1.236 0.216488    
year2016:month3                 -1.764e-02  2.023e-02  -0.872 0.383462    
year2017:month3                  5.821e-02  2.060e-02   2.825 0.004766 ** 
year2018:month3                 -3.319e-02  2.096e-02  -1.583 0.113533    
year2016:month4                 -6.828e-02  2.077e-02  -3.288 0.001025 ** 
year2017:month4                  1.033e-01  2.047e-02   5.046 4.89e-07 ***
year2018:month4                         NA         NA      NA       NA    
year2016:month5                 -3.712e-02  2.012e-02  -1.845 0.065210 .  
year2017:month5                  1.571e-01  2.091e-02   7.512 8.57e-14 ***
year2018:month5                         NA         NA      NA       NA    
year2016:month6                 -2.374e-02  2.067e-02  -1.149 0.250864    
year2017:month6                  1.148e-01  2.102e-02   5.463 5.23e-08 ***
year2018:month6                         NA         NA      NA       NA    
year2016:month7                  4.701e-02  2.073e-02   2.268 0.023445 *  
year2017:month7                  1.328e-01  2.112e-02   6.286 3.95e-10 ***
year2018:month7                         NA         NA      NA       NA    
year2016:month8                  2.643e-02  2.014e-02   1.312 0.189538    
year2017:month8                  1.663e-01  2.044e-02   8.135 6.95e-16 ***
year2018:month8                         NA         NA      NA       NA    
year2016:month9                  6.379e-02  2.077e-02   3.071 0.002159 ** 
year2017:month9                  2.513e-01  2.147e-02  11.706  < 2e-16 ***
year2018:month9                         NA         NA      NA       NA    
year2016:month10                 1.389e-01  2.053e-02   6.766 1.72e-11 ***
year2017:month10                 2.413e-01  2.104e-02  11.464  < 2e-16 ***
year2018:month10                        NA         NA      NA       NA    
year2016:month11                 1.972e-01  2.048e-02   9.626  < 2e-16 ***
year2017:month11                 1.663e-01  2.074e-02   8.017 1.77e-15 ***
year2018:month11                        NA         NA      NA       NA    
year2016:month12                 3.177e-02  2.053e-02   1.547 0.121906    
year2017:month12                 1.024e-01  2.050e-02   4.998 6.29e-07 ***
year2018:month12                        NA         NA      NA       NA    
regionGreatLakes:total_volume   -2.009e-08  3.755e-09  -5.350 9.78e-08 ***
regionMidsouth:total_volume     -3.521e-08  4.501e-09  -7.822 8.14e-15 ***
regionNortheast:total_volume    -9.684e-09  2.902e-09  -3.337 0.000861 ***
regionPlains:total_volume       -1.384e-07  7.841e-09 -17.646  < 2e-16 ***
regionSouthCentral:total_volume  1.061e-08  2.251e-09   4.711 2.62e-06 ***
regionSoutheast:total_volume     1.130e-09  3.422e-09   0.330 0.741157    
regionWest:total_volume         -2.190e-08  2.301e-09  -9.519  < 2e-16 ***
typeorganic:total_volume        -1.213e-06  4.779e-08 -25.372  < 2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.07631 on 2104 degrees of freedom
Multiple R-squared:  0.9171,    Adjusted R-squared:  0.9148 
F-statistic: 394.6 on 59 and 2104 DF,  p-value: < 2.2e-16
glance(lm_multi)
plot(lm_multi)


lm12 <- lm(log(average_price) ~ type + region + month + year + x4046 + total_volume + large_bags + type:total_volume + type:region + type:year + region:total_volume, train)
summary(lm12)

Call:
lm(formula = log(average_price) ~ type + region + month + year + 
    x4046 + total_volume + large_bags + type:total_volume + type:region + 
    type:year + region:total_volume, data = train)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.29711 -0.05424 -0.00103  0.05371  0.58560 

Coefficients:
                                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)                      5.040e-01  4.200e-02  12.000  < 2e-16 ***
typeorganic                      1.132e-01  4.174e-02   2.712 0.006737 ** 
regionGreatLakes                 4.623e-02  5.474e-02   0.845 0.398458    
regionMidsouth                  -6.778e-02  5.790e-02  -1.171 0.241853    
regionNortheast                 -9.002e-02  5.419e-02  -1.661 0.096845 .  
regionPlains                    -1.949e-02  5.207e-02  -0.374 0.708235    
regionSouthCentral              -1.663e-01  6.121e-02  -2.717 0.006641 ** 
regionSoutheast                 -6.441e-02  5.020e-02  -1.283 0.199627    
regionWest                      -1.099e-01  5.461e-02  -2.013 0.044280 *  
month2                          -5.992e-03  8.212e-03  -0.730 0.465728    
month3                           2.174e-02  8.023e-03   2.710 0.006777 ** 
month4                           7.506e-02  8.803e-03   8.527  < 2e-16 ***
month5                           9.300e-02  8.875e-03  10.479  < 2e-16 ***
month6                           1.089e-01  8.942e-03  12.181  < 2e-16 ***
month7                           1.411e-01  8.794e-03  16.051  < 2e-16 ***
month8                           1.452e-01  8.775e-03  16.547  < 2e-16 ***
month9                           1.671e-01  9.052e-03  18.459  < 2e-16 ***
month10                          1.585e-01  8.883e-03  17.847  < 2e-16 ***
month11                          7.760e-02  9.009e-03   8.614  < 2e-16 ***
month12                          2.495e-03  8.786e-03   0.284 0.776443    
year2016                         9.240e-02  7.159e-03  12.907  < 2e-16 ***
year2017                         2.423e-01  6.958e-03  34.822  < 2e-16 ***
year2018                         3.110e-01  1.221e-02  25.475  < 2e-16 ***
x4046                            4.992e-08  9.840e-09   5.073 4.26e-07 ***
total_volume                    -1.229e-07  7.607e-09 -16.158  < 2e-16 ***
large_bags                      -2.163e-01  1.476e-02 -14.657  < 2e-16 ***
typeorganic:total_volume        -1.338e-06  5.432e-08 -24.634  < 2e-16 ***
typeorganic:regionGreatLakes    -9.084e-02  5.398e-02  -1.683 0.092572 .  
typeorganic:regionMidsouth       1.609e-02  5.713e-02   0.282 0.778277    
typeorganic:regionNortheast      1.904e-01  5.361e-02   3.551 0.000393 ***
typeorganic:regionPlains        -3.563e-02  5.227e-02  -0.682 0.495506    
typeorganic:regionSouthCentral  -1.245e-01  6.087e-02  -2.046 0.040916 *  
typeorganic:regionSoutheast     -4.898e-02  5.038e-02  -0.972 0.330996    
typeorganic:regionWest           2.467e-01  5.423e-02   4.549 5.70e-06 ***
typeorganic:year2016            -9.931e-02  9.931e-03 -10.000  < 2e-16 ***
typeorganic:year2017            -1.291e-01  1.028e-02 -12.552  < 2e-16 ***
typeorganic:year2018            -1.448e-01  1.733e-02  -8.356  < 2e-16 ***
regionGreatLakes:total_volume   -4.966e-08  1.280e-08  -3.880 0.000108 ***
regionMidsouth:total_volume     -3.889e-08  1.574e-08  -2.471 0.013540 *  
regionNortheast:total_volume     4.550e-08  1.119e-08   4.066 4.95e-05 ***
regionPlains:total_volume       -1.954e-07  1.911e-08 -10.229  < 2e-16 ***
regionSouthCentral:total_volume -1.441e-08  1.029e-08  -1.401 0.161390    
regionSoutheast:total_volume    -2.368e-08  1.060e-08  -2.233 0.025684 *  
regionWest:total_volume          1.558e-08  8.890e-09   1.753 0.079786 .  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.08421 on 2120 degrees of freedom
Multiple R-squared:  0.8983,    Adjusted R-squared:  0.8962 
F-statistic: 435.4 on 43 and 2120 DF,  p-value: < 2.2e-16
glance(lm12)
plot(lm12)

avocados_1%>% 
  dplyr::select(average_price, is.factor) %>% 
  ggpairs() 


# month, type and region all quite strong

avocados_1 %>% 
  dplyr::select(is.numeric) %>% 
  ggpairs() 

lm4 <- lm(average_price ~ type + region + month, train)

summary(lm4)
plot(lm4)
avocados_resid <- train %>% 
  add_residuals(lm5) %>% 
  dplyr::select(-average_price, -type, -region, -month) %>% 
  dplyr::select(resid, everything()) 


avocados_resid %>% 
  dplyr::select(resid, is.factor) %>% 
  ggpairs() 


# month, type and region all quite strong

avocados_resid %>% 
  dplyr::select(is.numeric) %>% 
  ggpairs() 

LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpgYGB7cn0NCmxpYnJhcnkodGlkeXZlcnNlKQ0KbGlicmFyeShsdWJyaWRhdGUpDQpsaWJyYXJ5KGphbml0b3IpDQpsaWJyYXJ5KGNhcmV0KQ0KbGlicmFyeShicm9vbSkNCmxpYnJhcnkoZmFzdER1bW1pZXMpDQpsaWJyYXJ5KEdHYWxseSkNCmxpYnJhcnkoZ2dmb3J0aWZ5KQ0KbGlicmFyeShtb3NhaWMpDQpsaWJyYXJ5KG1vc2FpY0RhdGEpDQpsaWJyYXJ5KG1vZGVscikNCmxpYnJhcnkocmVsYWltcG8pDQpsaWJyYXJ5KHRpZHl2ZXJzZSkNCmxpYnJhcnkoZ2xtdWx0aSkNCmBgYA0KDQpgYGB7cn0NCmF2b2NhZG9zIDwtIHJlYWRfY3N2KGhlcmU6OmhlcmUoIndlZWtlbmQvZGF0YS9hdm9jYWRvLmNzdiIpKSAlPiUgY2xlYW5fbmFtZXMoKQ0KYGBgDQoNCmBgYHtyfQ0KYXZvY2Fkb3MgJT4lDQogIGRpc3RpbmN0KHJlZ2lvbikNCg0KYXZvY2Fkb3MgJT4lIA0KICBmaWx0ZXIocmVnaW9uID09ICJUb3RhbFVTIiwNCiAgICAgICAgIGRhdGUgPT0gIjIwMTUtMTItMjciKQ0KDQphdm9jYWRvcyAlPiUgDQogIGZpbHRlcihyZWdpb24gJWluJSBjKCJNaWRzb3V0aCIsICJOb3J0aGVhc3QiLCAiUGxhaW5zIiwgIlNvdXRoQ2VudHJhbCIsICJTb3V0aGVhc3QiLCAiV2VzdCIsICJHcmVhdExha2VzIiwgIkNhbGlmb3JuaWEiKSwNCiAgICAgICAgIGRhdGUgPT0gIjIwMTUtMTItMjciKSAlPiUgDQogIGdyb3VwX2J5KHR5cGUpICU+JSANCiAgc3VtbWFyaXNlKHRvdGFsX3ZvbHVtZSA9IHN1bSh0b3RhbF92b2x1bWUpLA0KICAgICAgICAgICAgeDQwNDYgPSBzdW0oeDQwNDYpLA0KICAgICAgICAgICAgdG90YWxfYmFncyA9IHN1bSh0b3RhbF9iYWdzKSkNCg0KYXZvY2Fkb3MgJT4lIA0KICBmaWx0ZXIoIXJlZ2lvbiAlaW4lIGMoIlRvdGFsVVMiLCAiTWlkc291dGgiLCAiTm9ydGhlYXN0IiwgIlBsYWlucyIsICJTb3V0aENlbnRyYWwiLCAiU291dGhlYXN0IiwgIldlc3QiLCAiR3JlYXRMYWtlcyIsICJDYWxpZm9ybmlhIiksDQogICAgICAgICBkYXRlID09ICIyMDE1LTEyLTI3IikgJT4lIA0KICBncm91cF9ieSh0eXBlKSAlPiUgDQogIHN1bW1hcmlzZSh0b3RhbF92b2x1bWUgPSBzdW0odG90YWxfdm9sdW1lKSwNCiAgICAgICAgICAgIHg0MDQ2ID0gc3VtKHg0MDQ2KSwNCiAgICAgICAgICAgIHRvdGFsX2JhZ3MgPSBzdW0odG90YWxfYmFncykpDQoNCiMgTWlzc291dGgsIE5vcnRoZWFzdCwgUGxhaW5zLCBTb3V0aENlbnRyYWwsIFNvdXRoZWFzdCwgV2VzdCwgR3JlYXRMYWtlcywgQ2FsaWZvcm5pYQ0KIyBUaGVzZSBhcmUgdGhlIHJlZ2lvbnMgdGhhdCBtYWtlIHVwIHRoZSB0b3RhbCBVUy4gQWJvdmUgY29kZSBzaG93cyB0aGF0IHRoZSB0b3RhbCBudW1iZXJzIG9mIHRoZSBjYXRlZ29yaWVzIGFyZSB0aGUgc2FtZQ0KDQojIEkgd2lsbCBmaWx0ZXIgdGhlIGRhdGEgc28gdGhhdCBpdCBvbmx5IHVzZXMgdGhlIGxpbmVzIGZvciB0aGUgYWJvdmUgcmVnaW9ucyB0byBhdm9pZCBkb3VibGUgY291bnRpbmcuDQojIG5vIG5lZWQgdG8gd29ycnkgYWJvdXQgdGhlIHNtYWxsZXIgYXJlYXMgYXMgdGhlc2UgYXJlIGNvbnRhaW5lZCB3aXRoaW4gdGhlIGJpZ2dlciByZWdpb25zDQojIGFsc28gaSBkb250IHRoaW5rIGl0IG1ha2VzIHNlbnNlIHRvIGxvb2sgYXQgcmVnaW9ucyB0aGF0IHNtYWxsDQojIGFsc28gYnkgdGhlIGxvb2tzIG9mIGl0IHRoZSByZW1haW5pbmcgY2l0aWVzIGFyZW50IGFuIGV4aGF1c3RpdmUgbGlzdCANCg0KIyBtaWdodCBiZSBnb29kIHRvIGV4dHJhY3QgbW9udGggZnJvbSBkYXRlIGFzIGEgY2F0ZWdvcmljYWwNCiMgcHJvcG9ydGlvbnMgb2YgNDA0NiwgNDIyNSwgNDc3MCBtYXkgYmUgYmV0dGVyIHRoYW4gYWJzb2x1dGVzLCBhbHNvIHByb3BzIG9mIGJhZyBzaXplIG1heSBiZSBiZXR0ZXIgdGhhbiBhYnNvbHV0ZSBudW1iZXJzIA0KYGBgDQpgYGB7cn0NCmF2b2NhZG9zICU+JSANCiAgZmlsdGVyKHJlZ2lvbiAlaW4lIGMoIk1pZHNvdXRoIiwgDQogICAgICAgICAgICAgICAgICAgICAgICJOb3J0aGVhc3QiLCANCiAgICAgICAgICAgICAgICAgICAgICAgIlBsYWlucyIsIA0KICAgICAgICAgICAgICAgICAgICAgICAiU291dGhDZW50cmFsIiwgDQogICAgICAgICAgICAgICAgICAgICAgICJTb3V0aGVhc3QiLCANCiAgICAgICAgICAgICAgICAgICAgICAgIldlc3QiLCANCiAgICAgICAgICAgICAgICAgICAgICAgIkdyZWF0TGFrZXMiLCANCiAgICAgICAgICAgICAgICAgICAgICAgIkNhbGlmb3JuaWEiKSkgJT4lIA0KICBtdXRhdGUoYSA9IHJvdW5kKHNtYWxsX2JhZ3MgKyBsYXJnZV9iYWdzICsgeF9sYXJnZV9iYWdzLCAyKSAtIHJvdW5kKHRvdGFsX2JhZ3MsIDIpKSAlPiUgDQogIGFycmFuZ2UoZGVzYyhhKSkNCg0KYXZvY2Fkb3MNCg0KDQoNCmFsaWFzKGxtKGF2ZXJhZ2VfcHJpY2UgfiAuLCBkYXRhID0gYXZvY2Fkb3MpKQ0KDQojIGZvciBzb21lIHJlYXNvbiB0aGUgYmFnIHNpemVzIGRvbnQgcHJvcGVybHkgYWRkIHVwIHRvIHRoZSB0b3RhbF9iYWdzDQojIHdpdGhvdXQgdGhlIHJvdW5kIHRoZXkgYXJlIGRpZmZlcmVudCBieSBsaWtlIDAuMDAwMDAwMDAwMDAxDQojIHdpdGggdGhlIHJvdW5kIHRoZXJlIGFyZSBhIGNvdXBsZSBhdCBlaXRoZXIgZW5kIG91dCBieSArLTEgDQojIHRoaXMgbWVhbnMgdGhleSBkb250IHNob3cgdXAgaW4gYWxpYXMgLSBJJ20gZ29pbmcgdG8gbWFudWFsIHJlbW92ZSB4IGxhcmdlcyBiYWdzIGFzIHRoaXMgY2FuIGJlIGRlcml2ZWQgZnJvbSB0aGUgb3RoZXIgMw0KIyB4NDA0NiwgeDQyMjUgYW5kIHggNDc3MCBkb250IGFkZCB1cCB0byB0aGUgdG90YWwgdm9sdW1lIHNvIGNhbiBrZWVwIHRoZW0gYWxsDQpgYGANCg0KYGBge3J9DQphdm9jYWRvcyA8LSBhdm9jYWRvcyAlPiUgZmlsdGVyKHJlZ2lvbiAlaW4lIGMoIk1pZHNvdXRoIiwgDQogICAgICAgICAgICAgICAgICAgICAgICJOb3J0aGVhc3QiLCANCiAgICAgICAgICAgICAgICAgICAgICAgIlBsYWlucyIsIA0KICAgICAgICAgICAgICAgICAgICAgICAiU291dGhDZW50cmFsIiwgDQogICAgICAgICAgICAgICAgICAgICAgICJTb3V0aGVhc3QiLCANCiAgICAgICAgICAgICAgICAgICAgICAgIldlc3QiLCANCiAgICAgICAgICAgICAgICAgICAgICAgIkdyZWF0TGFrZXMiLCANCiAgICAgICAgICAgICAgICAgICAgICAgIkNhbGlmb3JuaWEiKSkgJT4lIA0KICBtdXRhdGUobW9udGggPSBhcy5mYWN0b3IobW9udGgoZGF0ZSkpLA0KICAgICAgICAgYWNyb3NzKHdoZXJlKGlzLmNoYXJhY3RlciksIGFzLmZhY3RvciksDQogICAgICAgICB5ZWFyID0gYXMuZmFjdG9yKHllYXIpKSAlPiUgDQogIGRwbHlyOjpzZWxlY3QoLXgxLCAtZGF0ZSwgLXhfbGFyZ2VfYmFncywgLXRvdGFsX2JhZ3MpIA0KICANCmBgYA0KDQoNCg0KYGBge3J9DQpuX2RhdGEgPC0gbnJvdyhhdm9jYWRvcykNCg0KdGVzdF9pbmRleCA8LSBzYW1wbGUoMTpuX2RhdGEsIHNpemUgPSBuX2RhdGEgKiAwLjIpDQoNCnRlc3QgPC0gYXZvY2Fkb3MgJT4lIA0KICBzbGljZSh0ZXN0X2luZGV4KQ0KDQp0cmFpbiA8LSBhdm9jYWRvcyAlPiUgDQogIHNsaWNlKC10ZXN0X2luZGV4KQ0KYGBgDQoNCg0KYGBge3J9DQpsbTEgPC0gbG0oYXZlcmFnZV9wcmljZSB+IHRvdGFsX3ZvbHVtZSwgdHJhaW4pDQoNCnN1bW1hcnkobG0xKQ0KcGxvdE1vZGVsKGxtMSkNCnBsb3QobG0xKQ0KDQojIGRpYWdub3N0b2NzIGFyZSBtYWQgaSB0aGluayB0aGlzIGlzIGJlY2F1c2Ugb2YgdGhlIG9yZ2FuaWMgdHlwZQ0KDQoNCg0KYGBgDQoNCmBgYHtyfQ0KYXZvY2Fkb3NfcmVzaWQgPC0gdHJhaW4gJT4lIA0KICBhZGRfcmVzaWR1YWxzKGxtMSkgJT4lIA0KICBkcGx5cjo6c2VsZWN0KC1hdmVyYWdlX3ByaWNlLCAtdG90YWxfdm9sdW1lKSAlPiUgDQogIGRwbHlyOjpzZWxlY3QocmVzaWQsIGV2ZXJ5dGhpbmcoKSkgDQoNCg0KYXZvY2Fkb3NfcmVzaWQgJT4lIA0KICBkcGx5cjo6c2VsZWN0KHJlc2lkLCBpcy5mYWN0b3IpICU+JSANCiAgZ2dwYWlycygpIA0KDQojIG1vbnRoLCB0eXBlIGFuZCByZWdpb24gYWxsIHF1aXRlIHN0cm9uZw0KDQphdm9jYWRvc19yZXNpZCAlPiUgDQogZHBseXI6OnNlbGVjdChpcy5udW1lcmljKSAlPiUgDQogIGdncGFpcnMoKSANCg0KYGBgDQoNCmBgYHtyfQ0KdHJhaW4gJT4lIA0KICBnZ3Bsb3QoYWVzKHggPSB4NDA0NiwgeSA9IGF2ZXJhZ2VfcHJpY2UsIGNvbG91ciA9IHR5cGUpKSArDQogIGdlb21fcG9pbnQoKSArDQogIGdlb21fc21vb3RoKG1ldGhvZCA9ICJsbSIsIHNlID0gRkFMU0UpDQpgYGANCg0KYGBge3J9DQojIGZpbHRlciBieSB0eXBlIHRvIGNoZWNrIGlmIHRoZSBhcmUgY29ycmVsYXRlZCB3aXRoIGRpZmZlcmVudCB0aGluZ3MNCg0KYXZvY2Fkb3MgJT4lIA0KICBmaWx0ZXIodHlwZSA9PSAiY29udmVudGlvbmFsIikgJT4lIA0KICBkcGx5cjo6c2VsZWN0KGF2ZXJhZ2VfcHJpY2UsIGlzLmZhY3RvcikgJT4lIA0KICBnZ3BhaXJzKCkgDQoNCiMgbW9udGgsIHR5cGUgYW5kIHJlZ2lvbiBhbGwgcXVpdGUgc3Ryb25nDQoNCmF2b2NhZG9zICU+JSANCiAgZmlsdGVyKHR5cGUgPT0gImNvbnZlbnRpb25hbCIpICU+JQ0KICBkcGx5cjo6c2VsZWN0KGlzLm51bWVyaWMpICU+JSANCiAgZ2dwYWlycygpIA0KDQoNCg0KDQoNCmF2b2NhZG9zICU+JSANCiAgZmlsdGVyKHR5cGUgPT0gIm9yZ2FuaWMiKSAlPiUgDQogIGRwbHlyOjpzZWxlY3QoYXZlcmFnZV9wcmljZSwgaXMuZmFjdG9yKSAlPiUgDQogIGdncGFpcnMoKSANCg0KIyBtb250aCwgdHlwZSBhbmQgcmVnaW9uIGFsbCBxdWl0ZSBzdHJvbmcNCg0KYXZvY2Fkb3MgJT4lIA0KICBmaWx0ZXIodHlwZSA9PSAib3JnYW5pYyIpICU+JQ0KICBkcGx5cjo6c2VsZWN0KGlzLm51bWVyaWMpICU+JSANCiAgZ2dwYWlycygpIA0KDQojIHRvdGFsIHZvbHVtZSBhbmQgdG90YWwgYmFncyBzdHJvbmdlc3QNCg0KDQpgYGANCg0KDQoNCmBgYHtyfQ0KIyBnb2luZyB0byBkbyB0eXBlIGZpcnN0DQoNCmxtMiA8LSBsbShhdmVyYWdlX3ByaWNlIH4gdHlwZSwgdHJhaW4pDQoNCnN1bW1hcnkobG0yKQ0KcGxvdChsbTIpDQoNCg0KYGBgDQoNCg0KYGBge3J9DQphdm9jYWRvc19yZXNpZCA8LSB0cmFpbiAlPiUgDQogIGFkZF9yZXNpZHVhbHMobG0yKSAlPiUgDQogIGRwbHlyOjpzZWxlY3QoLWF2ZXJhZ2VfcHJpY2UsIC10eXBlKSAlPiUgDQogIGRwbHlyOjpzZWxlY3QocmVzaWQsIGV2ZXJ5dGhpbmcoKSkgDQoNCg0KYXZvY2Fkb3NfcmVzaWQgJT4lIA0KICBkcGx5cjo6c2VsZWN0KHJlc2lkLCBpcy5mYWN0b3IpICU+JSANCiAgZ2dwYWlycygpIA0KDQojIG1vbnRoLCB0eXBlIGFuZCByZWdpb24gYWxsIHF1aXRlIHN0cm9uZw0KDQphdm9jYWRvc19yZXNpZCAlPiUgDQogIGRwbHlyOjpzZWxlY3QoaXMubnVtZXJpYykgJT4lIA0KICBnZ3BhaXJzKCkgDQpgYGANCg0KDQoNCmBgYHtyfQ0KbG0zIDwtIGxtKGF2ZXJhZ2VfcHJpY2UgfiB0eXBlICsgcmVnaW9uLCB0cmFpbikNCg0Kc3VtbWFyeShsbTMpDQpwbG90KGxtMykNCg0KYW5vdmEobG0yLCBsbTMpDQoNCmBgYA0KDQoNCmBgYHtyfQ0KYXZvY2Fkb3NfcmVzaWQgPC0gdHJhaW4gJT4lIA0KICBhZGRfcmVzaWR1YWxzKGxtMykgJT4lIA0KICBkcGx5cjo6c2VsZWN0KC1hdmVyYWdlX3ByaWNlLCAtdHlwZSwgLXJlZ2lvbikgJT4lIA0KICBkcGx5cjo6c2VsZWN0KHJlc2lkLCBldmVyeXRoaW5nKCkpIA0KDQoNCmF2b2NhZG9zX3Jlc2lkICU+JSANCiAgZHBseXI6OnNlbGVjdChyZXNpZCwgaXMuZmFjdG9yKSAlPiUgDQogIGdncGFpcnMoKSANCg0KIyBtb250aCwgdHlwZSBhbmQgcmVnaW9uIGFsbCBxdWl0ZSBzdHJvbmcNCg0KYXZvY2Fkb3NfcmVzaWQgJT4lIA0KICBkcGx5cjo6c2VsZWN0KGlzLm51bWVyaWMpICU+JSANCiAgZ2dwYWlycygpIA0KYGBgDQoNCg0KYGBge3J9DQpsbTQgPC0gbG0oYXZlcmFnZV9wcmljZSB+IHR5cGUgKyByZWdpb24gKyBtb250aCwgdHJhaW4pDQoNCnN1bW1hcnkobG00KQ0KcGxvdChsbTQpDQoNCmFub3ZhKGxtMywgbG00KQ0KDQojIGxvZyBoZWxwcyBkaWFnbm9zdGljcw0KDQpsbTQgPC0gbG0obG9nKGF2ZXJhZ2VfcHJpY2UpIH4gdHlwZSArIHJlZ2lvbiArIG1vbnRoLCB0cmFpbikNCg0Kc3VtbWFyeShsbTQpDQpwbG90KGxtNCkNCg0KYGBgDQoNCmBgYHtyfQ0KYXZvY2Fkb3NfcmVzaWQgPC0gdHJhaW4gJT4lIA0KICBhZGRfcmVzaWR1YWxzKGxtNCkgJT4lIA0KICBkcGx5cjo6c2VsZWN0KC1hdmVyYWdlX3ByaWNlLCAtdHlwZSwgLXJlZ2lvbiwgLW1vbnRoKSAlPiUgDQogIGRwbHlyOjpzZWxlY3QocmVzaWQsIGV2ZXJ5dGhpbmcoKSkgDQoNCg0KYXZvY2Fkb3NfcmVzaWQgJT4lIA0KICBkcGx5cjo6c2VsZWN0KHJlc2lkLCBpcy5mYWN0b3IpICU+JSANCiAgZ2dwYWlycygpIA0KDQojIG1vbnRoLCB0eXBlIGFuZCByZWdpb24gYWxsIHF1aXRlIHN0cm9uZw0KDQphdm9jYWRvc19yZXNpZCAlPiUgDQogIGRwbHlyOjpzZWxlY3QoaXMubnVtZXJpYykgJT4lIA0KICBnZ3BhaXJzKCkgDQpgYGANCg0KDQoNCmBgYHtyfQ0KDQpsbTUgPC0gbG0obG9nKGF2ZXJhZ2VfcHJpY2UpIH4gdHlwZSArIHJlZ2lvbiArIG1vbnRoICsgeWVhciwgdHJhaW4pDQoNCnN1bW1hcnkobG01KQ0KcGxvdChsbTUpDQoNCmFub3ZhKGxtNCwgbG01KQ0KYGBgDQoNCmBgYHtyfQ0KYXZvY2Fkb3NfcmVzaWQgPC0gdHJhaW4gJT4lIA0KICBhZGRfcmVzaWR1YWxzKGxtNSkgJT4lIA0KICBkcGx5cjo6c2VsZWN0KC1hdmVyYWdlX3ByaWNlLCAtdHlwZSwgLXJlZ2lvbiwgLW1vbnRoLCAteWVhcikgJT4lIA0KICBkcGx5cjo6c2VsZWN0KHJlc2lkLCBldmVyeXRoaW5nKCkpIA0KDQoNCmF2b2NhZG9zX3Jlc2lkICU+JSANCiAgZHBseXI6OnNlbGVjdChyZXNpZCwgaXMuZmFjdG9yKSAlPiUgDQogIGdncGFpcnMoKSANCg0KIyBtb250aCwgdHlwZSBhbmQgcmVnaW9uIGFsbCBxdWl0ZSBzdHJvbmcNCg0KYXZvY2Fkb3NfcmVzaWQgJT4lIA0KICBkcGx5cjo6c2VsZWN0KGlzLm51bWVyaWMpICU+JSANCiAgZ2dwYWlycygpIA0KYGBgDQoNCmBgYHtyfQ0KbG02IDwtIGxtKGxvZyhhdmVyYWdlX3ByaWNlKSB+IHR5cGUgKyByZWdpb24gKyBtb250aCArIHllYXIgKyB4NDA0NiwgdHJhaW4pDQoNCnN1bW1hcnkobG02KQ0KcGxvdChsbTYpDQoNCmBgYA0KDQpgYGB7cn0NCmF2b2NhZG9zX3Jlc2lkIDwtIHRyYWluICU+JSANCiAgYWRkX3Jlc2lkdWFscyhsbTYpICU+JSANCiAgZHBseXI6OnNlbGVjdCgtYXZlcmFnZV9wcmljZSwgLXR5cGUsIC1yZWdpb24sIC1tb250aCwgLXllYXIsIC14NDA0NikgJT4lIA0KICBkcGx5cjo6c2VsZWN0KHJlc2lkLCBldmVyeXRoaW5nKCkpIA0KDQoNCmF2b2NhZG9zX3Jlc2lkICU+JSANCiAgZHBseXI6OnNlbGVjdChpcy5udW1lcmljKSAlPiUgDQogIGdncGFpcnMoKSANCmBgYA0KDQoNCmBgYHtyfQ0KDQoNCg0KbG03IDwtIGxtKGxvZyhhdmVyYWdlX3ByaWNlKSB+IHR5cGUgKyByZWdpb24gKyBtb250aCArIHllYXIgKyB4NDA0NiArIHRvdGFsX3ZvbHVtZSwgdHJhaW4pDQoNCnN1bW1hcnkobG03KQ0KcGxvdChsbTcpDQpgYGANCg0KYGBge3J9DQphdm9jYWRvc19yZXNpZCA8LSB0cmFpbiAlPiUgDQogIGFkZF9yZXNpZHVhbHMobG03KSAlPiUgDQogIGRwbHlyOjpzZWxlY3QoLWF2ZXJhZ2VfcHJpY2UsIC10eXBlLCAtcmVnaW9uLCAtbW9udGgsIC15ZWFyLCAteDQwNDYsIC10b3RhbF92b2x1bWUpICU+JSANCiAgZHBseXI6OnNlbGVjdChyZXNpZCwgZXZlcnl0aGluZygpKSANCg0KDQphdm9jYWRvc19yZXNpZCAlPiUgDQogIGRwbHlyOjpzZWxlY3QoaXMubnVtZXJpYykgJT4lIA0KICBnZ3BhaXJzKCkgDQpgYGANCg0KYGBge3J9DQpsbTggPC0gbG0obG9nKGF2ZXJhZ2VfcHJpY2UpIH4gdHlwZSArIHJlZ2lvbiArIG1vbnRoICsgeWVhciArIHg0MDQ2ICsgdG90YWxfdm9sdW1lICsgbGFyZ2VfYmFncywgdHJhaW4pDQpsbThiIDwtIGxtKGxvZyhhdmVyYWdlX3ByaWNlKX4xK3R5cGUreWVhcityZWdpb24rbW9udGgrdG90YWxfdm9sdW1lK3NtYWxsX2JhZ3MrbGFyZ2VfYmFncywgdHJhaW4pDQoNCnN1bW1hcnkobG04KQ0KcGxvdChsbTgpDQpzdW1tYXJ5KGxtOGIpDQpwbG90KGxtOGIpDQpgYGANCg0KYGBge3J9DQphdm9jYWRvc19yZXNpZCA8LSB0cmFpbiAlPiUgDQogIGFkZF9yZXNpZHVhbHMobG04KSAlPiUgDQogIGRwbHlyOjpzZWxlY3QoLWF2ZXJhZ2VfcHJpY2UsIC10eXBlLCAtcmVnaW9uLCAtbW9udGgsIC15ZWFyLCAteDQwNDYsIC10b3RhbF92b2x1bWUsIC1sYXJnZV9iYWdzKSAlPiUgDQogIGRwbHlyOjpzZWxlY3QocmVzaWQsIGV2ZXJ5dGhpbmcoKSkgDQoNCg0KYXZvY2Fkb3NfcmVzaWQgJT4lIA0KICBkcGx5cjo6c2VsZWN0KGlzLm51bWVyaWMpICU+JSANCiAgZ2dwYWlycygpIA0KYGBgDQoNCmBgYHtyfQ0KbG05IDwtIGxtKGxvZyhhdmVyYWdlX3ByaWNlKSB+IHR5cGUgKyByZWdpb24gKyBtb250aCArIHllYXIgKyB4NDA0NiArIHRvdGFsX3ZvbHVtZSArIGxhcmdlX2JhZ3MsIHRyYWluKQ0KDQpzdW1tYXJ5KGxtOSkNCnBsb3QobG05KQ0KYGBgDQoNCmBgYHtyfQ0KbG0xMCA8LSBsbShsb2coYXZlcmFnZV9wcmljZSkgfiB0eXBlICsgcmVnaW9uICsgbW9udGggKyB5ZWFyICsgeDQwNDYgKyB0b3RhbF92b2x1bWUgKyBsYXJnZV9iYWdzICsgdHlwZTp0b3RhbF92b2x1bWUsIHRyYWluKQ0KDQpzdW1tYXJ5KGxtMTApDQpwbG90KGxtMTApDQpgYGANCg0KDQoNCmBgYHtyfQ0KbG0xMSA8LSBsbShsb2coYXZlcmFnZV9wcmljZSkgfiB0eXBlICsgcmVnaW9uICsgbW9udGggKyB5ZWFyICsgeDQwNDYgKyB0b3RhbF92b2x1bWUgKyBsYXJnZV9iYWdzICsgdHlwZTp0b3RhbF92b2x1bWUgKyB0eXBlOnJlZ2lvbiwgdHJhaW4pDQoNCmxtMTIgPC0gbG0obG9nKGF2ZXJhZ2VfcHJpY2UpIH4gdHlwZSArIHJlZ2lvbiArIG1vbnRoICsgeWVhciArIHg0MDQ2ICsgdG90YWxfdm9sdW1lICsgbGFyZ2VfYmFncyArIHR5cGU6dG90YWxfdm9sdW1lICsgdHlwZTpyZWdpb24gKyB0eXBlOnllYXIgKyByZWdpb246dG90YWxfdm9sdW1lLCB0cmFpbikNCg0Kc3VtbWFyeShsbTExKQ0KcGxvdChsbTExKQ0KZ2xhbmNlKGxtMTEpDQpnbGFuY2UobG0xMikNCmBgYA0KDQoNCmBgYHtyfQ0KdHJhaW5fcm1zZSA8LSB0cmFpbiAlPiUgDQogIGFkZF9yZXNpZHVhbHMobG0xMikgJT4lIA0KICBtdXRhdGUoc3FfcmVzaWQgPSByZXNpZF4yKSAlPiUgDQogIHN1bW1hcmlzZShtc2UgPSBtZWFuKHNxX3Jlc2lkKSwNCiAgICAgICAgICAgIHJtc2UgPSBtc2VeMC41KSAlPiUgDQogIHB1bGwocm1zZSkNCg0KDQp0cmFpbl9ybXNlDQoNCnByZWRpY3Rpb25zX3Rlc3QgPC0gdGVzdCAlPiUgDQogIGFkZF9wcmVkaWN0aW9ucyhsbTEyKSAlPiUgDQogIGFkZF9yZXNpZHVhbHMobG0xMikgJT4lIA0KICBkcGx5cjo6c2VsZWN0KGF2ZXJhZ2VfcHJpY2UsIHByZWQsIHJlc2lkKSANCiAgDQp0ZXN0X3JzbWUgPC0gcHJlZGljdGlvbnNfdGVzdCAlPiUgDQogIG11dGF0ZShzcV9yZXNpZCA9IHJlc2lkXjIpICU+JSANCiAgc3VtbWFyaXNlKG1zZSA9IG1lYW4oc3FfcmVzaWQpLA0KICAgICAgICAgICAgcm1zZSA9IG1zZV4wLjUpICU+JSANCiAgcHVsbChybXNlKQ0KDQp0ZXN0X3JzbWUgLyB0cmFpbl9ybXNlDQpgYGANCg0KDQpgYGB7cn0NCmN2XzEwX2ZvbGQgPC0gdHJhaW5Db250cm9sKA0KICBtZXRob2QgPSAiY3YiLA0KICBudW1iZXIgPSAxMCwNCiAgc2F2ZVByZWRpY3Rpb25zID0gVFJVRQ0KKQ0KDQptb2RlbDIgPC0gdHJhaW4obG9nKGF2ZXJhZ2VfcHJpY2UpIH4gdHlwZSArIHJlZ2lvbiArIG1vbnRoICsgeWVhciArIHg0MDQ2ICsgdG90YWxfdm9sdW1lICsgbGFyZ2VfYmFncyArIHR5cGU6dG90YWxfdm9sdW1lLCANCiAgICAgICAgICAgICAgIGF2b2NhZG9zLCANCiAgICAgICAgICAgICAgIHRyQ29udHJvbCA9IGN2XzEwX2ZvbGQsIA0KICAgICAgICAgICAgICAgbWV0aG9kID0gImxtIikNCg0KIG1vZGVsMiRyZXNhbXBsZSAlPiUgDQogIHN1bW1hcmlzZShhdl9yMiA9IG1lYW4oUnNxdWFyZWQpLA0KICAgICAgICAgICAgYXZfcm1zZSA9IG1lYW4oUk1TRSkpDQpgYGANCg0KYGBge3J9DQpjdl8xMF9mb2xkIDwtIHRyYWluQ29udHJvbCgNCiAgbWV0aG9kID0gImN2IiwNCiAgbnVtYmVyID0gMTAsDQogIHNhdmVQcmVkaWN0aW9ucyA9IFRSVUUNCikNCg0KbW9kZWwyIDwtIHRyYWluKGxvZyhhdmVyYWdlX3ByaWNlKSB+IHR5cGUgKyByZWdpb24gKyBtb250aCArIHllYXIgKyB4NDA0NisgdG90YWxfdm9sdW1lICsgbGFyZ2VfYmFncyArIHR5cGU6dG90YWxfdm9sdW1lICsgdHlwZTpyZWdpb24gKyB0eXBlOnllYXIgKyByZWdpb246dG90YWxfdm9sdW1lLCANCiAgICAgICAgICAgICAgIGF2b2NhZG9zLCANCiAgICAgICAgICAgICAgIHRyQ29udHJvbCA9IGN2XzEwX2ZvbGQsIA0KICAgICAgICAgICAgICAgbWV0aG9kID0gImxtIikNCg0KIG1vZGVsMiRyZXNhbXBsZSAlPiUgDQogIHN1bW1hcmlzZShhdl9yMiA9IG1lYW4oUnNxdWFyZWQpLA0KICAgICAgICAgICAgYXZfcm1zZSA9IG1lYW4oUk1TRSkpDQpgYGANCg0KYGBge3J9DQpjYWxjLnJlbGltcChsbTksIHR5cGUgPSAibG1nIiwgcmVsYSA9IFRSVUUpDQpgYGANCg0KDQoNCg0KQVVUT01BVEVEDQoNCmBgYHtyfQ0KDQojIyMjIEdBUkJBR0UNCg0KcmVnc3Vic2V0X2ZvcndhcmRzIDwtIHJlZ3N1YnNldHMoYXZlcmFnZV9wcmljZSB+IC4sIA0KICAgICAgICAgICAgICAgICAgICAgICAgICBkYXRhID0gYXZvY2Fkb3MsDQogICAgICAgICAgICAgICAgICAgICAgICAgIG52bWF4ID0gNDAsDQogICAgICAgICAgICAgICAgICAgICAgICAgIG1ldGhvZCA9ICJmb3J3YXJkIikNCg0KDQpzdW1tYXJ5KHJlZ3N1YnNldF9mb3J3YXJkcykNCg0KcGxvdChyZWdzdWJzZXRfZm9yd2FyZHMsDQogICAgIHNjYWxlID0gImJpYyIpDQoNCnN1bV9mb3J3YXJkIDwtIHN1bW1hcnkocmVnc3Vic2V0X2ZvcndhcmRzKQ0KDQpwbG90KHN1bV9mb3J3YXJkJGJpYywNCiAgICAgdHlwZSA9ICJiIikNCmBgYA0KDQpgYGB7cn0NCiMgZ2xtdWx0aQ0KDQoNCmdsbXVsdGlfZml0IDwtIGdsbXVsdGkoDQogIGxvZyhhdmVyYWdlX3ByaWNlKSB+IC4sICMgbW9kZWwgdG8gZml0LCBpbiB0aGlzIGNhc2UsIGNoYXJnZXMgdmFyaWVzIHdpdGggZXZlcnl0aGluZw0KICBsZXZlbCA9IDIsICMgbGV2ZWwgPSAyIG1lYW5zIHRyeSBwYWlyd2lzZSBpbnRlcmFjdGlvbnMuIGxldmVsID0gMSBtZWFucyBtYWluIGVmZmVjdHMgb25seQ0KICBkYXRhID0gdHJhaW4sICMgZGF0YSB0byB1c2UgZm9yIGZpdHRpbmcNCiAgbWluc2l6ZSA9IDAsICMgbWluIHNpemUgb2YgbW9kZWwgdG8gdHJ5LCBpbiBudW1iZXIgb2YgcHJlZGljdG9ycw0KICBtYXhzaXplID0gLTEsICMgbWF4IHNpemUgdG8gdHJ5LCBzZXQgdG8gLTEgZm9yIHVubGltaXRlZA0KICBtYXJnaW5hbGl0eSA9IFRSVUUsICMgbWFyZ2luYWxpdHkgdHJ1ZSBtZWFucyBpbmNsdWRlIHBhaXJ3aXNlIGludGVyYWN0aW9uIG9ubHkgaWYgYm90aCBtYWluIGVmZmVjdHMgcHJlc2VudCBpbiBtb2RlbC4gIA0KICBtZXRob2QgPSAiZCIsICMgbWV0aG9kICJkIiBtZWFucyB0cmlhbCBydW4sIHRvIGdldCBzaXplIG9mIHByb2JsZW0uIFNldCB0byAiaCIgZm9yIGV4aGF1c3RpdmUgc2VhcmNoLCBvciAiZyIgZm9yIGdlbmV0aWMgYWxnb3JpdGhtDQogIGNvbmZzZXRzaXplID0gMTAsICMgaG93IG1hbnkgbW9kZWxzIHNob3VsZCBnbG11bHRpKCkgcmV0dXJuPyBNdXN0IGJlIGxlc3MgdGhhbiB0b3RhbCBzaXplIG9mIHByb2JsZW0NCiAgcGxvdHR5ID0gRkFMU0UsICMgcHJvdmlkZSBwcm9ncmVzcyBwbG90cz8gR2VuZXJhbGx5IGFubm95aW5nLg0KICByZXBvcnQgPSBUUlVFLCAjIHByb3ZpZGUgcHJvZ3Jlc3MgcmVwb3J0cz8gR2VuZXJhbGx5IHVzZWZ1bC4NCiAgZml0ZnVuY3Rpb24gPSBsbSwgIyB1c2UgbG0oKSBhcyBmaXQgZnVuY3Rpb24uIENhbiBhbHNvIHVzZSBnbG0oKSBmb3IgbG9naXN0aWMgcmVncmVzc2lvbi4NCiAgY3JpdCA9IGFpYyAjIGNyaXRlcmlvbiBmb3Igc2VsZWN0aW5nIGJlc3QgbW9kZWxzLiANCikNCmBgYA0KDQoNCmBgYHtyfQ0KbG1fbXVsdGkgPC0gbG0obG9nKGF2ZXJhZ2VfcHJpY2UpfjErdHlwZSt5ZWFyK3JlZ2lvbittb250aCt0b3RhbF92b2x1bWUreDQwNDYreDQyMjUreDQ3NzArc21hbGxfYmFncytsYXJnZV9iYWdzK3llYXI6dHlwZStyZWdpb246dHlwZStyZWdpb246eWVhcittb250aDp0eXBlK21vbnRoOnllYXIrbW9udGg6cmVnaW9uK3g0MDQ2OnRvdGFsX3ZvbHVtZSt4NDIyNTp0b3RhbF92b2x1bWUrc21hbGxfYmFnczp0b3RhbF92b2x1bWUrc21hbGxfYmFnczp4NDA0NitzbWFsbF9iYWdzOng0MjI1K3NtYWxsX2JhZ3M6eDQ3NzArbGFyZ2VfYmFnczp0b3RhbF92b2x1bWUrbGFyZ2VfYmFnczpzbWFsbF9iYWdzK3R5cGU6dG90YWxfdm9sdW1lK3R5cGU6eDQyMjUreWVhcjp0b3RhbF92b2x1bWUreWVhcjp4NDA0Nit5ZWFyOng0NzcwK3llYXI6bGFyZ2VfYmFncytyZWdpb246dG90YWxfdm9sdW1lK3JlZ2lvbjp4NDIyNStyZWdpb246bGFyZ2VfYmFncyttb250aDp0b3RhbF92b2x1bWUrbW9udGg6eDQ3NzArbW9udGg6c21hbGxfYmFncywgdHJhaW4pIA0KDQpzdW1tYXJ5KGxtX211bHRpKQ0KZ2xhbmNlKGxtX211bHRpKQ0KYGBgDQoNCmBgYHtyfQ0KdHJhaW5fcm1zZSA8LSB0cmFpbiAlPiUgDQogIGFkZF9yZXNpZHVhbHMobG1fbXVsdGkpICU+JSANCiAgbXV0YXRlKHNxX3Jlc2lkID0gcmVzaWReMikgJT4lIA0KICBzdW1tYXJpc2UobXNlID0gbWVhbihzcV9yZXNpZCksDQogICAgICAgICAgICBybXNlID0gbXNlXjAuNSkgJT4lIA0KICBwdWxsKHJtc2UpDQoNCg0KdHJhaW5fcm1zZQ0KDQpwcmVkaWN0aW9uc190ZXN0IDwtIHRlc3QgJT4lIA0KICBhZGRfcHJlZGljdGlvbnMobG1fbXVsdGkpICU+JSANCiAgYWRkX3Jlc2lkdWFscyhsbV9tdWx0aSkgJT4lIA0KICBkcGx5cjo6c2VsZWN0KGF2ZXJhZ2VfcHJpY2UsIHByZWQsIHJlc2lkKSANCiAgDQp0ZXN0X3JzbWUgPC0gcHJlZGljdGlvbnNfdGVzdCAlPiUgDQogIG11dGF0ZShzcV9yZXNpZCA9IHJlc2lkXjIpICU+JSANCiAgc3VtbWFyaXNlKG1zZSA9IG1lYW4oc3FfcmVzaWQpLA0KICAgICAgICAgICAgcm1zZSA9IG1zZV4wLjUpICU+JSANCiAgcHVsbChybXNlKQ0KDQp0ZXN0X3JzbWUgLyB0cmFpbl9ybXNlDQoNCiMgb3ZlciBmaXQ/IGRlc3BpdGUgaGF2aW5nIGEgZ29vZCBiaWMNCmBgYA0KYGBge3J9DQpnbG11bHRpX2ZpdCA8LSBnbG11bHRpKA0KICBsb2coYXZlcmFnZV9wcmljZSkgfiAuLCAjIG1vZGVsIHRvIGZpdCwgaW4gdGhpcyBjYXNlLCBjaGFyZ2VzIHZhcmllcyB3aXRoIGV2ZXJ5dGhpbmcNCiAgbGV2ZWwgPSAyLCAjIGxldmVsID0gMiBtZWFucyB0cnkgcGFpcndpc2UgaW50ZXJhY3Rpb25zLiBsZXZlbCA9IDEgbWVhbnMgbWFpbiBlZmZlY3RzIG9ubHkNCiAgZGF0YSA9IHRyYWluLCAjIGRhdGEgdG8gdXNlIGZvciBmaXR0aW5nDQogIG1pbnNpemUgPSAwLCAjIG1pbiBzaXplIG9mIG1vZGVsIHRvIHRyeSwgaW4gbnVtYmVyIG9mIHByZWRpY3RvcnMNCiAgbWF4c2l6ZSA9IDEwLCAjIG1heCBzaXplIHRvIHRyeSwgc2V0IHRvIC0xIGZvciB1bmxpbWl0ZWQNCiAgbWFyZ2luYWxpdHkgPSBUUlVFLCAjIG1hcmdpbmFsaXR5IHRydWUgbWVhbnMgaW5jbHVkZSBwYWlyd2lzZSBpbnRlcmFjdGlvbiBvbmx5IGlmIGJvdGggbWFpbiBlZmZlY3RzIHByZXNlbnQgaW4gbW9kZWwuICANCiAgbWV0aG9kID0gImQiLCAjIG1ldGhvZCAiZCIgbWVhbnMgdHJpYWwgcnVuLCB0byBnZXQgc2l6ZSBvZiBwcm9ibGVtLiBTZXQgdG8gImgiIGZvciBleGhhdXN0aXZlIHNlYXJjaCwgb3IgImciIGZvciBnZW5ldGljIGFsZ29yaXRobQ0KICBjb25mc2V0c2l6ZSA9IDEwMDAsICMgaG93IG1hbnkgbW9kZWxzIHNob3VsZCBnbG11bHRpKCkgcmV0dXJuPyBNdXN0IGJlIGxlc3MgdGhhbiB0b3RhbCBzaXplIG9mIHByb2JsZW0NCiAgcGxvdHR5ID0gRkFMU0UsICMgcHJvdmlkZSBwcm9ncmVzcyBwbG90cz8gR2VuZXJhbGx5IGFubm95aW5nLg0KICByZXBvcnQgPSBUUlVFLCAjIHByb3ZpZGUgcHJvZ3Jlc3MgcmVwb3J0cz8gR2VuZXJhbGx5IHVzZWZ1bC4NCiAgZml0ZnVuY3Rpb24gPSBsbSwgIyB1c2UgbG0oKSBhcyBmaXQgZnVuY3Rpb24uIENhbiBhbHNvIHVzZSBnbG0oKSBmb3IgbG9naXN0aWMgcmVncmVzc2lvbi4NCiAgY3JpdCA9IGFpYyAjIGNyaXRlcmlvbiBmb3Igc2VsZWN0aW5nIGJlc3QgbW9kZWxzLiANCikNCmBgYA0KDQoNCmBgYHtyfQ0KdHJhaW4gPSB0cmFpbiAlPiUgDQogIHNlbGVjdChhdmVyYWdlX3ByaWNlLCByZWdpb24sIHR5cGUsIHllYXIsIG1vbnRoLCB0b3RhbF92b2x1bWUsIGxhcmdlX2JhZ3MpDQoNCg0KYGBgDQoNCg0KDQoNCg0KYGBge3J9DQpsbV9tdWx0aSA8LSBsbShsb2coYXZlcmFnZV9wcmljZSl+IDEgKyByZWdpb24gKyB0eXBlICsgeWVhciArIG1vbnRoICsgdG90YWxfdm9sdW1lICsgbGFyZ2VfYmFncyArIHllYXI6dHlwZSArIG1vbnRoOnllYXIgKyByZWdpb246dG90YWxfdm9sdW1lICsgdHlwZTp0b3RhbF92b2x1bWUsIHRyYWluKQ0KDQpzdW1tYXJ5KGxtX211bHRpKQ0KZ2xhbmNlKGxtX211bHRpKQ0KcGxvdChsbV9tdWx0aSkNCg0KbG0xMiA8LSBsbShsb2coYXZlcmFnZV9wcmljZSkgfiB0eXBlICsgcmVnaW9uICsgbW9udGggKyB5ZWFyICsgeDQwNDYgKyB0b3RhbF92b2x1bWUgKyBsYXJnZV9iYWdzICsgdHlwZTp0b3RhbF92b2x1bWUgKyB0eXBlOnJlZ2lvbiArIHR5cGU6eWVhciArIHJlZ2lvbjp0b3RhbF92b2x1bWUsIHRyYWluKQ0Kc3VtbWFyeShsbTEyKQ0KZ2xhbmNlKGxtMTIpDQpwbG90KGxtMTIpDQpgYGANCg0KDQoNCmBgYHtyfQ0KYXZvY2Fkb3NfMSA8LSBhdm9jYWRvcyAlPiUgDQogIG11dGF0ZShhY3Jvc3MoeDQwNDY6bGFyZ2VfYmFncywgfi54IC8gdG90YWxfdm9sdW1lKSkgDQpgYGANCg0KDQpgYGB7cn0NCmF2b2NhZG9zXzElPiUgDQogIGRwbHlyOjpzZWxlY3QoYXZlcmFnZV9wcmljZSwgaXMuZmFjdG9yKSAlPiUgDQogIGdncGFpcnMoKSANCg0KIyBtb250aCwgdHlwZSBhbmQgcmVnaW9uIGFsbCBxdWl0ZSBzdHJvbmcNCg0KYXZvY2Fkb3NfMSAlPiUgDQogIGRwbHlyOjpzZWxlY3QoaXMubnVtZXJpYykgJT4lIA0KICBnZ3BhaXJzKCkgDQpgYGANCg0KDQoNCmBgYHtyfQ0KbG00IDwtIGxtKGF2ZXJhZ2VfcHJpY2UgfiB0eXBlICsgcmVnaW9uICsgbW9udGgsIHRyYWluKQ0KDQpzdW1tYXJ5KGxtNCkNCnBsb3QobG00KQ0KYGBgDQoNCg0KYGBge3J9DQphdm9jYWRvc19yZXNpZCA8LSB0cmFpbiAlPiUgDQogIGFkZF9yZXNpZHVhbHMobG01KSAlPiUgDQogIGRwbHlyOjpzZWxlY3QoLWF2ZXJhZ2VfcHJpY2UsIC10eXBlLCAtcmVnaW9uLCAtbW9udGgpICU+JSANCiAgZHBseXI6OnNlbGVjdChyZXNpZCwgZXZlcnl0aGluZygpKSANCg0KDQphdm9jYWRvc19yZXNpZCAlPiUgDQogIGRwbHlyOjpzZWxlY3QocmVzaWQsIGlzLmZhY3RvcikgJT4lIA0KICBnZ3BhaXJzKCkgDQoNCiMgbW9udGgsIHR5cGUgYW5kIHJlZ2lvbiBhbGwgcXVpdGUgc3Ryb25nDQoNCmF2b2NhZG9zX3Jlc2lkICU+JSANCiAgZHBseXI6OnNlbGVjdChpcy5udW1lcmljKSAlPiUgDQogIGdncGFpcnMoKSANCg0KYGBgDQoNCg==